Rss20.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. <?php
  2. namespace PicoFeed\Parser;
  3. use SimpleXMLElement;
  4. use PicoFeed\Filter\Filter;
  5. use PicoFeed\Client\Url;
  6. /**
  7. * RSS 2.0 Parser.
  8. *
  9. * @package PicoFeed\Parser
  10. * @author Frederic Guillot
  11. */
  12. class Rss20 extends Parser
  13. {
  14. /**
  15. * Supported namespaces.
  16. */
  17. protected $namespaces = array(
  18. 'dc' => 'http://purl.org/dc/elements/1.1/',
  19. 'content' => 'http://purl.org/rss/1.0/modules/content/',
  20. 'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
  21. 'atom' => 'http://www.w3.org/2005/Atom',
  22. );
  23. /**
  24. * Get the path to the items XML tree.
  25. *
  26. * @param SimpleXMLElement $xml Feed xml
  27. * @return SimpleXMLElement[]
  28. */
  29. public function getItemsTree(SimpleXMLElement $xml)
  30. {
  31. return XmlParser::getXPathResult($xml, 'channel/item');
  32. }
  33. /**
  34. * Find the feed url.
  35. *
  36. * @param SimpleXMLElement $xml Feed xml
  37. * @param \PicoFeed\Parser\Feed $feed Feed object
  38. */
  39. public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
  40. {
  41. $feed->setFeedUrl('');
  42. }
  43. /**
  44. * Find the site url.
  45. *
  46. * @param SimpleXMLElement $xml Feed xml
  47. * @param \PicoFeed\Parser\Feed $feed Feed object
  48. */
  49. public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
  50. {
  51. $value = XmlParser::getXPathResult($xml, 'channel/link');
  52. $feed->setSiteUrl(XmlParser::getValue($value));
  53. }
  54. /**
  55. * Find the feed description.
  56. *
  57. * @param SimpleXMLElement $xml Feed xml
  58. * @param \PicoFeed\Parser\Feed $feed Feed object
  59. */
  60. public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
  61. {
  62. $value = XmlParser::getXPathResult($xml, 'channel/description');
  63. $feed->setDescription(XmlParser::getValue($value));
  64. }
  65. /**
  66. * Find the feed logo url.
  67. *
  68. * @param SimpleXMLElement $xml Feed xml
  69. * @param \PicoFeed\Parser\Feed $feed Feed object
  70. */
  71. public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
  72. {
  73. $value = XmlParser::getXPathResult($xml, 'channel/image/url');
  74. $feed->setLogo(XmlParser::getValue($value));
  75. }
  76. /**
  77. * Find the feed icon.
  78. *
  79. * @param SimpleXMLElement $xml Feed xml
  80. * @param \PicoFeed\Parser\Feed $feed Feed object
  81. */
  82. public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
  83. {
  84. $feed->setIcon('');
  85. }
  86. /**
  87. * Find the feed title.
  88. *
  89. * @param SimpleXMLElement $xml Feed xml
  90. * @param \PicoFeed\Parser\Feed $feed Feed object
  91. */
  92. public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
  93. {
  94. $title = XmlParser::getXPathResult($xml, 'channel/title');
  95. $feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
  96. }
  97. /**
  98. * Find the feed language.
  99. *
  100. * @param SimpleXMLElement $xml Feed xml
  101. * @param \PicoFeed\Parser\Feed $feed Feed object
  102. */
  103. public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
  104. {
  105. $value = XmlParser::getXPathResult($xml, 'channel/language');
  106. $feed->setLanguage(XmlParser::getValue($value));
  107. }
  108. /**
  109. * Find the feed id.
  110. *
  111. * @param SimpleXMLElement $xml Feed xml
  112. * @param \PicoFeed\Parser\Feed $feed Feed object
  113. */
  114. public function findFeedId(SimpleXMLElement $xml, Feed $feed)
  115. {
  116. $feed->setId($feed->getFeedUrl() ?: $feed->getSiteUrl());
  117. }
  118. /**
  119. * Find the feed date.
  120. *
  121. * @param SimpleXMLElement $xml Feed xml
  122. * @param \PicoFeed\Parser\Feed $feed Feed object
  123. */
  124. public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
  125. {
  126. $publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
  127. $update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
  128. $published = !empty($publish_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($publish_date)) : null;
  129. $updated = !empty($update_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($update_date)) : null;
  130. if ($published === null && $updated === null) {
  131. $feed->setDate($this->getDateParser()->getCurrentDateTime()); // We use the current date if there is no date for the feed
  132. } elseif ($published !== null && $updated !== null) {
  133. $feed->setDate(max($published, $updated)); // We use the most recent date between published and updated
  134. } else {
  135. $feed->setDate($updated ?: $published);
  136. }
  137. }
  138. /**
  139. * Find the item published date.
  140. *
  141. * @param SimpleXMLElement $entry Feed item
  142. * @param Item $item Item object
  143. * @param \PicoFeed\Parser\Feed $feed Feed object
  144. */
  145. public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  146. {
  147. $date = XmlParser::getXPathResult($entry, 'pubDate');
  148. $item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($date)) : null);
  149. }
  150. /**
  151. * Find the item updated date.
  152. *
  153. * @param SimpleXMLElement $entry Feed item
  154. * @param Item $item Item object
  155. * @param \PicoFeed\Parser\Feed $feed Feed object
  156. */
  157. public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  158. {
  159. if ($item->publishedDate === null) {
  160. $this->findItemPublishedDate($entry, $item, $feed);
  161. }
  162. $item->setUpdatedDate($item->getPublishedDate()); // No updated date in RSS 2.0 specifications
  163. }
  164. /**
  165. * Find the item title.
  166. *
  167. * @param SimpleXMLElement $entry Feed item
  168. * @param \PicoFeed\Parser\Item $item Item object
  169. */
  170. public function findItemTitle(SimpleXMLElement $entry, Item $item)
  171. {
  172. $value = XmlParser::getXPathResult($entry, 'title');
  173. $item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($value)) ?: $item->getUrl());
  174. }
  175. /**
  176. * Find the item author.
  177. *
  178. * @param SimpleXMLElement $xml Feed
  179. * @param SimpleXMLElement $entry Feed item
  180. * @param \PicoFeed\Parser\Item $item Item object
  181. */
  182. public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
  183. {
  184. $value = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
  185. ?: XmlParser::getXPathResult($entry, 'author')
  186. ?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces)
  187. ?: XmlParser::getXPathResult($xml, 'channel/managingEditor');
  188. $item->setAuthor(XmlParser::getValue($value));
  189. }
  190. /**
  191. * Find the item content.
  192. *
  193. * @param SimpleXMLElement $entry Feed item
  194. * @param \PicoFeed\Parser\Item $item Item object
  195. */
  196. public function findItemContent(SimpleXMLElement $entry, Item $item)
  197. {
  198. $content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
  199. if (XmlParser::getValue($content) === '') {
  200. $content = XmlParser::getXPathResult($entry, 'description');
  201. }
  202. $item->setContent(XmlParser::getValue($content));
  203. }
  204. /**
  205. * Find the item URL.
  206. *
  207. * @param SimpleXMLElement $entry Feed item
  208. * @param \PicoFeed\Parser\Item $item Item object
  209. */
  210. public function findItemUrl(SimpleXMLElement $entry, Item $item)
  211. {
  212. $link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
  213. ?: XmlParser::getXPathResult($entry, 'link')
  214. ?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
  215. if (!empty($link)) {
  216. $item->setUrl(XmlParser::getValue($link));
  217. } else {
  218. $link = XmlParser::getXPathResult($entry, 'guid');
  219. $link = XmlParser::getValue($link);
  220. if (filter_var($link, FILTER_VALIDATE_URL) !== false) {
  221. $item->setUrl($link);
  222. }
  223. }
  224. }
  225. /**
  226. * Genereate the item id.
  227. *
  228. * @param SimpleXMLElement $entry Feed item
  229. * @param \PicoFeed\Parser\Item $item Item object
  230. * @param \PicoFeed\Parser\Feed $feed Feed object
  231. */
  232. public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
  233. {
  234. $id = XmlParser::getValue(XmlParser::getXPathResult($entry, 'guid'));
  235. if ($id) {
  236. $item->setId($this->generateId($id));
  237. } else {
  238. $item->setId($this->generateId(
  239. $item->getTitle(), $item->getUrl(), $item->getContent()
  240. ));
  241. }
  242. }
  243. /**
  244. * Find the item enclosure.
  245. *
  246. * @param SimpleXMLElement $entry Feed item
  247. * @param \PicoFeed\Parser\Item $item Item object
  248. * @param \PicoFeed\Parser\Feed $feed Feed object
  249. */
  250. public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
  251. {
  252. if (isset($entry->enclosure)) {
  253. $type = XmlParser::getXPathResult($entry, 'enclosure/@type');
  254. $url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces)
  255. ?: XmlParser::getXPathResult($entry, 'enclosure/@url');
  256. $item->setEnclosureUrl(Url::resolve(XmlParser::getValue($url), $feed->getSiteUrl()));
  257. $item->setEnclosureType(XmlParser::getValue($type));
  258. }
  259. }
  260. /**
  261. * Find the item language.
  262. *
  263. * @param SimpleXMLElement $entry Feed item
  264. * @param \PicoFeed\Parser\Item $item Item object
  265. * @param \PicoFeed\Parser\Feed $feed Feed object
  266. */
  267. public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
  268. {
  269. $language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
  270. $item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
  271. }
  272. /**
  273. * Find the item categories.
  274. *
  275. * @param SimpleXMLElement $entry Feed item
  276. * @param Item $item Item object
  277. * @param Feed $feed Feed object
  278. */
  279. public function findItemCategories(SimpleXMLElement $entry, Item $item, Feed $feed)
  280. {
  281. $categories = XmlParser::getXPathResult($entry, 'category');
  282. $item->setCategoriesFromXml($categories);
  283. }
  284. }