Rss20.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. <?php
  2. namespace PicoFeed\Parser;
  3. use SimpleXMLElement;
  4. use PicoFeed\Filter\Filter;
  5. use PicoFeed\Client\Url;
  6. /**
  7. * RSS 2.0 Parser.
  8. *
  9. * @package PicoFeed\Parser
  10. * @author Frederic Guillot
  11. */
  12. class Rss20 extends Parser
  13. {
  14. /**
  15. * Supported namespaces.
  16. */
  17. protected $namespaces = array(
  18. 'dc' => 'http://purl.org/dc/elements/1.1/',
  19. 'content' => 'http://purl.org/rss/1.0/modules/content/',
  20. 'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
  21. 'atom' => 'http://www.w3.org/2005/Atom',
  22. );
  23. /**
  24. * Get the path to the items XML tree.
  25. *
  26. * @param SimpleXMLElement $xml Feed xml
  27. * @return SimpleXMLElement[]
  28. */
  29. public function getItemsTree(SimpleXMLElement $xml)
  30. {
  31. return XmlParser::getXPathResult($xml, 'channel/item');
  32. }
  33. /**
  34. * Find the feed url.
  35. *
  36. * @param SimpleXMLElement $xml Feed xml
  37. * @param \PicoFeed\Parser\Feed $feed Feed object
  38. */
  39. public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
  40. {
  41. $feed->setFeedUrl('');
  42. }
  43. /**
  44. * Find the site url.
  45. *
  46. * @param SimpleXMLElement $xml Feed xml
  47. * @param \PicoFeed\Parser\Feed $feed Feed object
  48. */
  49. public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
  50. {
  51. $value = XmlParser::getXPathResult($xml, 'channel/link');
  52. $feed->setSiteUrl(XmlParser::getValue($value));
  53. }
  54. /**
  55. * Find the feed description.
  56. *
  57. * @param SimpleXMLElement $xml Feed xml
  58. * @param \PicoFeed\Parser\Feed $feed Feed object
  59. */
  60. public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
  61. {
  62. $value = XmlParser::getXPathResult($xml, 'channel/description');
  63. $feed->setDescription(XmlParser::getValue($value));
  64. }
  65. /**
  66. * Find the feed logo url.
  67. *
  68. * @param SimpleXMLElement $xml Feed xml
  69. * @param \PicoFeed\Parser\Feed $feed Feed object
  70. */
  71. public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
  72. {
  73. $value = XmlParser::getXPathResult($xml, 'channel/image/url');
  74. $feed->setLogo(XmlParser::getValue($value));
  75. }
  76. /**
  77. * Find the feed icon.
  78. *
  79. * @param SimpleXMLElement $xml Feed xml
  80. * @param \PicoFeed\Parser\Feed $feed Feed object
  81. */
  82. public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
  83. {
  84. $feed->setIcon('');
  85. }
  86. /**
  87. * Find the feed title.
  88. *
  89. * @param SimpleXMLElement $xml Feed xml
  90. * @param \PicoFeed\Parser\Feed $feed Feed object
  91. */
  92. public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
  93. {
  94. $title = XmlParser::getXPathResult($xml, 'channel/title');
  95. $feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
  96. }
  97. /**
  98. * Find the feed language.
  99. *
  100. * @param SimpleXMLElement $xml Feed xml
  101. * @param \PicoFeed\Parser\Feed $feed Feed object
  102. */
  103. public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
  104. {
  105. $value = XmlParser::getXPathResult($xml, 'channel/language');
  106. $feed->setLanguage(XmlParser::getValue($value));
  107. }
  108. /**
  109. * Find the feed id.
  110. *
  111. * @param SimpleXMLElement $xml Feed xml
  112. * @param \PicoFeed\Parser\Feed $feed Feed object
  113. */
  114. public function findFeedId(SimpleXMLElement $xml, Feed $feed)
  115. {
  116. $feed->setId($feed->getFeedUrl() ?: $feed->getSiteUrl());
  117. }
  118. /**
  119. * Find the feed date.
  120. *
  121. * @param SimpleXMLElement $xml Feed xml
  122. * @param \PicoFeed\Parser\Feed $feed Feed object
  123. */
  124. public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
  125. {
  126. $publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
  127. $update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
  128. $published = !empty($publish_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($publish_date)) : null;
  129. $updated = !empty($update_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($update_date)) : null;
  130. if ($published === null && $updated === null) {
  131. $feed->setDate($this->getDateParser()->getCurrentDateTime()); // We use the current date if there is no date for the feed
  132. } elseif ($published !== null && $updated !== null) {
  133. $feed->setDate(max($published, $updated)); // We use the most recent date between published and updated
  134. } else {
  135. $feed->setDate($updated ?: $published);
  136. }
  137. }
  138. /**
  139. * Find the item published date.
  140. *
  141. * @param SimpleXMLElement $entry Feed item
  142. * @param Item $item Item object
  143. * @param \PicoFeed\Parser\Feed $feed Feed object
  144. */
  145. public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  146. {
  147. $date = XmlParser::getXPathResult($entry, 'pubDate');
  148. $item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($date)) : null);
  149. }
  150. /**
  151. * Find the item updated date.
  152. *
  153. * @param SimpleXMLElement $entry Feed item
  154. * @param Item $item Item object
  155. * @param \PicoFeed\Parser\Feed $feed Feed object
  156. */
  157. public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  158. {
  159. if ($item->publishedDate === null) {
  160. $this->findItemPublishedDate($entry, $item, $feed);
  161. }
  162. $item->setUpdatedDate($item->getPublishedDate()); // No updated date in RSS 2.0 specifications
  163. }
  164. /**
  165. * Find the item title.
  166. *
  167. * @param SimpleXMLElement $entry Feed item
  168. * @param \PicoFeed\Parser\Item $item Item object
  169. */
  170. public function findItemTitle(SimpleXMLElement $entry, Item $item)
  171. {
  172. $value = XmlParser::getXPathResult($entry, 'title');
  173. $item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($value)) ?: $item->getUrl());
  174. }
  175. /**
  176. * Find the item author.
  177. *
  178. * @param SimpleXMLElement $xml Feed
  179. * @param SimpleXMLElement $entry Feed item
  180. * @param \PicoFeed\Parser\Item $item Item object
  181. */
  182. public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
  183. {
  184. $value = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
  185. ?: XmlParser::getXPathResult($entry, 'author')
  186. ?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces)
  187. ?: XmlParser::getXPathResult($xml, 'channel/managingEditor');
  188. $item->setAuthor(XmlParser::getValue($value));
  189. }
  190. /**
  191. * Find the item author URL.
  192. *
  193. * @param SimpleXMLElement $xml Feed
  194. * @param SimpleXMLElement $entry Feed item
  195. * @param \PicoFeed\Parser\Item $item Item object
  196. */
  197. public function findItemAuthorUrl(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
  198. {
  199. // There appears to be no support for author URL in the dc: terms or author element
  200. $item->setAuthorUrl('');
  201. }
  202. /**
  203. * Find the item content.
  204. *
  205. * @param SimpleXMLElement $entry Feed item
  206. * @param \PicoFeed\Parser\Item $item Item object
  207. */
  208. public function findItemContent(SimpleXMLElement $entry, Item $item)
  209. {
  210. $content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
  211. if (XmlParser::getValue($content) === '') {
  212. $content = XmlParser::getXPathResult($entry, 'description');
  213. }
  214. $item->setContent(XmlParser::getValue($content));
  215. }
  216. /**
  217. * Find the item URL.
  218. *
  219. * @param SimpleXMLElement $entry Feed item
  220. * @param \PicoFeed\Parser\Item $item Item object
  221. */
  222. public function findItemUrl(SimpleXMLElement $entry, Item $item)
  223. {
  224. $link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
  225. ?: XmlParser::getXPathResult($entry, 'link')
  226. ?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
  227. if (!empty($link)) {
  228. $item->setUrl(XmlParser::getValue($link));
  229. } else {
  230. $link = XmlParser::getXPathResult($entry, 'guid');
  231. $link = XmlParser::getValue($link);
  232. if (filter_var($link, FILTER_VALIDATE_URL) !== false) {
  233. $item->setUrl($link);
  234. }
  235. }
  236. }
  237. /**
  238. * Genereate the item id.
  239. *
  240. * @param SimpleXMLElement $entry Feed item
  241. * @param \PicoFeed\Parser\Item $item Item object
  242. * @param \PicoFeed\Parser\Feed $feed Feed object
  243. */
  244. public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
  245. {
  246. $id = XmlParser::getValue(XmlParser::getXPathResult($entry, 'guid'));
  247. if ($id) {
  248. $item->setId($this->generateId($id));
  249. } else {
  250. $item->setId($this->generateId(
  251. $item->getTitle(), $item->getUrl(), $item->getContent()
  252. ));
  253. }
  254. }
  255. /**
  256. * Find the item enclosure.
  257. *
  258. * @param SimpleXMLElement $entry Feed item
  259. * @param \PicoFeed\Parser\Item $item Item object
  260. * @param \PicoFeed\Parser\Feed $feed Feed object
  261. */
  262. public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
  263. {
  264. if (isset($entry->enclosure)) {
  265. $type = XmlParser::getXPathResult($entry, 'enclosure/@type');
  266. $url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces)
  267. ?: XmlParser::getXPathResult($entry, 'enclosure/@url');
  268. $item->setEnclosureUrl(Url::resolve(XmlParser::getValue($url), $feed->getSiteUrl()));
  269. $item->setEnclosureType(XmlParser::getValue($type));
  270. }
  271. }
  272. /**
  273. * Find the item language.
  274. *
  275. * @param SimpleXMLElement $entry Feed item
  276. * @param \PicoFeed\Parser\Item $item Item object
  277. * @param \PicoFeed\Parser\Feed $feed Feed object
  278. */
  279. public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
  280. {
  281. $language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
  282. $item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
  283. }
  284. /**
  285. * Find the item categories.
  286. *
  287. * @param SimpleXMLElement $entry Feed item
  288. * @param Item $item Item object
  289. * @param Feed $feed Feed object
  290. */
  291. public function findItemCategories(SimpleXMLElement $entry, Item $item, Feed $feed)
  292. {
  293. $categories = XmlParser::getXPathResult($entry, 'category');
  294. $item->setCategoriesFromXml($categories);
  295. }
  296. }