Atom.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. <?php
  2. namespace PicoFeed\Parser;
  3. use SimpleXMLElement;
  4. use PicoFeed\Filter\Filter;
  5. use PicoFeed\Client\Url;
  6. /**
  7. * Atom parser.
  8. *
  9. * @package PicoFeed\Parser
  10. * @author Frederic Guillot
  11. */
  12. class Atom extends Parser
  13. {
  14. /**
  15. * Supported namespaces.
  16. */
  17. protected $namespaces = array(
  18. 'atom' => 'http://www.w3.org/2005/Atom',
  19. );
  20. /**
  21. * Get the path to the items XML tree.
  22. *
  23. * @param SimpleXMLElement $xml Feed xml
  24. * @return SimpleXMLElement[]
  25. */
  26. public function getItemsTree(SimpleXMLElement $xml)
  27. {
  28. return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces)
  29. ?: XmlParser::getXPathResult($xml, 'entry');
  30. }
  31. /**
  32. * Find the feed url.
  33. *
  34. * @param SimpleXMLElement $xml Feed xml
  35. * @param \PicoFeed\Parser\Feed $feed Feed object
  36. */
  37. public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
  38. {
  39. $feed->setFeedUrl($this->getUrl($xml, 'self'));
  40. }
  41. /**
  42. * Find the site url.
  43. *
  44. * @param SimpleXMLElement $xml Feed xml
  45. * @param \PicoFeed\Parser\Feed $feed Feed object
  46. */
  47. public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
  48. {
  49. $feed->setSiteUrl($this->getUrl($xml, 'alternate', true));
  50. }
  51. /**
  52. * Find the feed description.
  53. *
  54. * @param SimpleXMLElement $xml Feed xml
  55. * @param \PicoFeed\Parser\Feed $feed Feed object
  56. */
  57. public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
  58. {
  59. $description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces)
  60. ?: XmlParser::getXPathResult($xml, 'subtitle');
  61. $feed->setDescription(XmlParser::getValue($description));
  62. }
  63. /**
  64. * Find the feed logo url.
  65. *
  66. * @param SimpleXMLElement $xml Feed xml
  67. * @param \PicoFeed\Parser\Feed $feed Feed object
  68. */
  69. public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
  70. {
  71. $logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces)
  72. ?: XmlParser::getXPathResult($xml, 'logo');
  73. $feed->setLogo(XmlParser::getValue($logo));
  74. }
  75. /**
  76. * Find the feed icon.
  77. *
  78. * @param SimpleXMLElement $xml Feed xml
  79. * @param \PicoFeed\Parser\Feed $feed Feed object
  80. */
  81. public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
  82. {
  83. $icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces)
  84. ?: XmlParser::getXPathResult($xml, 'icon');
  85. $feed->setIcon(XmlParser::getValue($icon));
  86. }
  87. /**
  88. * Find the feed title.
  89. *
  90. * @param SimpleXMLElement $xml Feed xml
  91. * @param \PicoFeed\Parser\Feed $feed Feed object
  92. */
  93. public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
  94. {
  95. $title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces)
  96. ?: XmlParser::getXPathResult($xml, 'title');
  97. $feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
  98. }
  99. /**
  100. * Find the feed language.
  101. *
  102. * @param SimpleXMLElement $xml Feed xml
  103. * @param \PicoFeed\Parser\Feed $feed Feed object
  104. */
  105. public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
  106. {
  107. $language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces)
  108. ?: XmlParser::getXPathResult($xml, '@xml:lang');
  109. $feed->setLanguage(XmlParser::getValue($language));
  110. }
  111. /**
  112. * Find the feed id.
  113. *
  114. * @param SimpleXMLElement $xml Feed xml
  115. * @param \PicoFeed\Parser\Feed $feed Feed object
  116. */
  117. public function findFeedId(SimpleXMLElement $xml, Feed $feed)
  118. {
  119. $id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces)
  120. ?: XmlParser::getXPathResult($xml, 'id');
  121. $feed->setId(XmlParser::getValue($id));
  122. }
  123. /**
  124. * Find the feed date.
  125. *
  126. * @param SimpleXMLElement $xml Feed xml
  127. * @param \PicoFeed\Parser\Feed $feed Feed object
  128. */
  129. public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
  130. {
  131. $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces)
  132. ?: XmlParser::getXPathResult($xml, 'updated');
  133. $feed->setDate($this->getDateParser()->getDateTime(XmlParser::getValue($updated)));
  134. }
  135. /**
  136. * Find the item published date.
  137. *
  138. * @param SimpleXMLElement $entry Feed item
  139. * @param Item $item Item object
  140. * @param \PicoFeed\Parser\Feed $feed Feed object
  141. */
  142. public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  143. {
  144. $date = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
  145. ?: XmlParser::getXPathResult($entry, 'published');
  146. $item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime((string) current($date)) : null);
  147. }
  148. /**
  149. * Find the item updated date.
  150. *
  151. * @param SimpleXMLElement $entry Feed item
  152. * @param Item $item Item object
  153. * @param \PicoFeed\Parser\Feed $feed Feed object
  154. */
  155. public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
  156. {
  157. $date = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
  158. ?: XmlParser::getXPathResult($entry, 'updated');
  159. $item->setUpdatedDate(!empty($date) ? $this->getDateParser()->getDateTime((string) current($date)) : null);
  160. }
  161. /**
  162. * Find the item title.
  163. *
  164. * @param SimpleXMLElement $entry Feed item
  165. * @param Item $item Item object
  166. */
  167. public function findItemTitle(SimpleXMLElement $entry, Item $item)
  168. {
  169. $title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces)
  170. ?: XmlParser::getXPathResult($entry, 'title');
  171. $item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $item->getUrl());
  172. }
  173. /**
  174. * Find the item author.
  175. *
  176. * @param SimpleXMLElement $xml Feed
  177. * @param SimpleXMLElement $entry Feed item
  178. * @param \PicoFeed\Parser\Item $item Item object
  179. */
  180. public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
  181. {
  182. $author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces)
  183. ?: XmlParser::getXPathResult($entry, 'author/name')
  184. ?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces)
  185. ?: XmlParser::getXPathResult($xml, 'author/name');
  186. $item->setAuthor(XmlParser::getValue($author));
  187. }
  188. /**
  189. * Find the item author URL.
  190. *
  191. * @param SimpleXMLElement $xml Feed
  192. * @param SimpleXMLElement $entry Feed item
  193. * @param \PicoFeed\Parser\Item $item Item object
  194. */
  195. public function findItemAuthorUrl(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
  196. {
  197. $authorUrl = XmlParser::getXPathResult($entry, 'atom:author/atom:uri', $this->namespaces)
  198. ?: XmlParser::getXPathResult($entry, 'author/uri')
  199. ?: XmlParser::getXPathResult($xml, 'atom:author/atom:uri', $this->namespaces)
  200. ?: XmlParser::getXPathResult($xml, 'author/uri');
  201. $item->setAuthorUrl(XmlParser::getValue($authorUrl));
  202. }
  203. /**
  204. * Find the item content.
  205. *
  206. * @param SimpleXMLElement $entry Feed item
  207. * @param \PicoFeed\Parser\Item $item Item object
  208. */
  209. public function findItemContent(SimpleXMLElement $entry, Item $item)
  210. {
  211. $item->setContent($this->getContent($entry));
  212. }
  213. /**
  214. * Find the item URL.
  215. *
  216. * @param SimpleXMLElement $entry Feed item
  217. * @param \PicoFeed\Parser\Item $item Item object
  218. */
  219. public function findItemUrl(SimpleXMLElement $entry, Item $item)
  220. {
  221. $item->setUrl($this->getUrl($entry, 'alternate', true));
  222. }
  223. /**
  224. * Genereate the item id.
  225. *
  226. * @param SimpleXMLElement $entry Feed item
  227. * @param \PicoFeed\Parser\Item $item Item object
  228. * @param \PicoFeed\Parser\Feed $feed Feed object
  229. */
  230. public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
  231. {
  232. $id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
  233. ?: XmlParser::getXPathResult($entry, 'id');
  234. if (!empty($id)) {
  235. $item->setId($this->generateId(XmlParser::getValue($id)));
  236. } else {
  237. $item->setId($this->generateId(
  238. $item->getTitle(), $item->getUrl(), $item->getContent()
  239. ));
  240. }
  241. }
  242. /**
  243. * Find the item enclosure.
  244. *
  245. * @param SimpleXMLElement $entry Feed item
  246. * @param \PicoFeed\Parser\Item $item Item object
  247. * @param \PicoFeed\Parser\Feed $feed Feed object
  248. */
  249. public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
  250. {
  251. $enclosure = $this->findLink($entry, 'enclosure');
  252. if ($enclosure) {
  253. $item->setEnclosureUrl(Url::resolve((string) $enclosure['href'], $feed->getSiteUrl()));
  254. $item->setEnclosureType((string) $enclosure['type']);
  255. }
  256. }
  257. /**
  258. * Find the item language.
  259. *
  260. * @param SimpleXMLElement $entry Feed item
  261. * @param \PicoFeed\Parser\Item $item Item object
  262. * @param \PicoFeed\Parser\Feed $feed Feed object
  263. */
  264. public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
  265. {
  266. $language = XmlParser::getXPathResult($entry, './/@xml:lang');
  267. $item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
  268. }
  269. /**
  270. * Find the item categories.
  271. *
  272. * @param SimpleXMLElement $entry Feed item
  273. * @param Item $item Item object
  274. * @param Feed $feed Feed object
  275. */
  276. public function findItemCategories(SimpleXMLElement $entry, Item $item, Feed $feed)
  277. {
  278. $categories = XmlParser::getXPathResult($entry, 'atom:category/@term', $this->namespaces)
  279. ?: XmlParser::getXPathResult($entry, 'category/@term');
  280. $item->setCategoriesFromXml($categories);
  281. }
  282. /**
  283. * Get the URL from a link tag.
  284. *
  285. * @param SimpleXMLElement $xml XML tag
  286. * @param string $rel Link relationship: alternate, enclosure, related, self, via
  287. * @return string
  288. */
  289. private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false)
  290. {
  291. $link = $this->findLink($xml, $rel);
  292. if ($link) {
  293. return (string) $link['href'];
  294. }
  295. if ($fallback) {
  296. $link = $this->findLink($xml, '');
  297. return $link ? (string) $link['href'] : '';
  298. }
  299. return '';
  300. }
  301. /**
  302. * Get a link tag that match a relationship.
  303. *
  304. * @param SimpleXMLElement $xml XML tag
  305. * @param string $rel Link relationship: alternate, enclosure, related, self, via
  306. * @return SimpleXMLElement|null
  307. */
  308. private function findLink(SimpleXMLElement $xml, $rel)
  309. {
  310. $links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces)
  311. ?: XmlParser::getXPathResult($xml, 'link');
  312. foreach ($links as $link) {
  313. if ($rel === (string) $link['rel']) {
  314. return $link;
  315. }
  316. }
  317. return null;
  318. }
  319. /**
  320. * Get the entry content.
  321. *
  322. * @param SimpleXMLElement $entry XML Entry
  323. * @return string
  324. */
  325. private function getContent(SimpleXMLElement $entry)
  326. {
  327. $content = current(
  328. XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces)
  329. ?: XmlParser::getXPathResult($entry, 'content')
  330. );
  331. if (!empty($content) && count($content->children())) {
  332. $xml_string = '';
  333. foreach ($content->children() as $child) {
  334. $xml_string .= $child->asXML();
  335. }
  336. return $xml_string;
  337. } elseif (trim((string) $content) !== '') {
  338. return (string) $content;
  339. }
  340. $summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces)
  341. ?: XmlParser::getXPathResult($entry, 'summary');
  342. return (string) current($summary);
  343. }
  344. }