FeedsSimplePieParser.inc 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. <?php
  2. /**
  3. * @file
  4. * Contains FeedsSimplePieParser and related classes.
  5. */
  6. /**
  7. * Adapter to present SimplePie_Enclosure as FeedsEnclosure object.
  8. */
  9. class FeedsSimplePieEnclosure extends FeedsEnclosure {
  10. protected $simplepie_enclosure;
  11. private $_serialized_simplepie_enclosure;
  12. /**
  13. * Constructor requires SimplePie enclosure object.
  14. */
  15. function __construct(SimplePie_Enclosure $enclosure) {
  16. $this->simplepie_enclosure = $enclosure;
  17. }
  18. /**
  19. * Serialization helper.
  20. *
  21. * Handle the simplepie enclosure class seperately ourselves.
  22. */
  23. public function __sleep() {
  24. $this->_serialized_simplepie_enclosure = serialize($this->simplepie_enclosure);
  25. return array('_serialized_simplepie_enclosure');
  26. }
  27. /**
  28. * Unserialization helper.
  29. *
  30. * Ensure that the simplepie class definitions are loaded for the enclosure when unserializing.
  31. */
  32. public function __wakeup() {
  33. feeds_include_simplepie();
  34. $this->simplepie_enclosure = unserialize($this->_serialized_simplepie_enclosure);
  35. }
  36. /**
  37. * Override parent::getValue().
  38. */
  39. public function getValue() {
  40. return $this->simplepie_enclosure->get_link();
  41. }
  42. /**
  43. * Override parent::getMIMEType().
  44. */
  45. public function getMIMEType() {
  46. return $this->simplepie_enclosure->get_real_type();
  47. }
  48. }
  49. /**
  50. * Class definition for Common Syndication Parser.
  51. *
  52. * Parses RSS and Atom feeds.
  53. */
  54. class FeedsSimplePieParser extends FeedsParser {
  55. /**
  56. * Implements FeedsParser::parse().
  57. */
  58. public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
  59. feeds_include_simplepie();
  60. // Please be quiet SimplePie.
  61. $level = error_reporting();
  62. error_reporting($level ^ E_DEPRECATED ^ E_STRICT);
  63. // Initialize SimplePie.
  64. $parser = new SimplePie();
  65. $parser->set_raw_data($fetcher_result->getRaw());
  66. $parser->set_stupidly_fast(TRUE);
  67. $parser->encode_instead_of_strip(FALSE);
  68. // @todo Is caching effective when we pass in raw data?
  69. $parser->enable_cache(TRUE);
  70. $parser->set_cache_location($this->cacheDirectory());
  71. $parser->init();
  72. // Construct the standard form of the parsed feed
  73. $result = new FeedsParserResult();
  74. $result->title = html_entity_decode(($title = $parser->get_title()) ? $title : $this->createTitle($parser->get_description()));
  75. $result->description = $parser->get_description();
  76. $result->link = html_entity_decode($parser->get_link());
  77. $items_num = $parser->get_item_quantity();
  78. for ($i = 0; $i < $items_num; $i++) {
  79. $item = array();
  80. $simplepie_item = $parser->get_item($i);
  81. $item['title'] = html_entity_decode(($title = $simplepie_item->get_title()) ? $title : $this->createTitle($simplepie_item->get_content()));
  82. $item['description'] = $simplepie_item->get_content();
  83. $item['url'] = html_entity_decode($simplepie_item->get_link());
  84. // Use UNIX time. If no date is defined, fall back to REQUEST_TIME.
  85. $item['timestamp'] = $simplepie_item->get_date("U");
  86. if (empty($item['timestamp'])) {
  87. $item['timestamp'] = REQUEST_TIME;
  88. }
  89. $item['guid'] = $simplepie_item->get_id();
  90. // Use URL as GUID if there is no GUID.
  91. if (empty($item['guid'])) {
  92. $item['guid'] = $item['url'];
  93. }
  94. $author = $simplepie_item->get_author();
  95. $item['author_name'] = isset($author->name) ? html_entity_decode($author->name) : '';
  96. $item['author_link'] = isset($author->link) ? $author->link : '';
  97. $item['author_email'] = isset($author->email) ? $author->email : '';
  98. // Enclosures
  99. $enclosures = $simplepie_item->get_enclosures();
  100. if (is_array($enclosures)) {
  101. foreach ($enclosures as $enclosure) {
  102. $item['enclosures'][] = new FeedsSimplePieEnclosure($enclosure);
  103. }
  104. }
  105. // Location
  106. $latitude = $simplepie_item->get_latitude();
  107. $longitude = $simplepie_item->get_longitude();
  108. if (!is_null($latitude) && !is_null($longitude)) {
  109. $item['location_latitude'][] = $latitude;
  110. $item['location_longitude'][] = $longitude;
  111. }
  112. // Extract tags related to the item
  113. $simplepie_tags = $simplepie_item->get_categories();
  114. $tags = array();
  115. $domains = array();
  116. if (count($simplepie_tags) > 0) {
  117. foreach ($simplepie_tags as $tag) {
  118. $tags[] = (string) $tag->term;
  119. $domain = (string) $tag->get_scheme();
  120. if (!empty($domain)) {
  121. if (!isset($domains[$domain])) {
  122. $domains[$domain] = array();
  123. }
  124. $domains[$domain][] = count($tags) - 1;
  125. }
  126. }
  127. }
  128. $item['domains'] = $domains;
  129. $item['tags'] = $tags;
  130. // Allow parsing to be extended.
  131. $this->parseExtensions($item, $simplepie_item);
  132. $item['raw'] = $simplepie_item->data;
  133. $result->items[] = $item;
  134. }
  135. // Release parser.
  136. unset($parser);
  137. // Set error reporting back to its previous value.
  138. error_reporting($level);
  139. return $result;
  140. }
  141. /**
  142. * Allow extension of FeedsSimplePie item parsing.
  143. */
  144. protected function parseExtensions(&$item, $simplepie_item) {}
  145. /**
  146. * Return mapping sources.
  147. */
  148. public function getMappingSources() {
  149. return array(
  150. 'title' => array(
  151. 'name' => t('Title'),
  152. 'description' => t('Title of the feed item.'),
  153. ),
  154. 'description' => array(
  155. 'name' => t('Description'),
  156. 'description' => t('Description of the feed item.'),
  157. ),
  158. 'author_name' => array(
  159. 'name' => t('Author name'),
  160. 'description' => t('Name of the feed item\'s author.'),
  161. ),
  162. 'author_link' => array(
  163. 'name' => t('Author link'),
  164. 'description' => t('Link to the feed item\'s author.'),
  165. ),
  166. 'author_email' => array(
  167. 'name' => t('Author email'),
  168. 'description' => t('Email address of the feed item\'s author.'),
  169. ),
  170. 'timestamp' => array(
  171. 'name' => t('Published date'),
  172. 'description' => t('Published date as UNIX time GMT of the feed item.'),
  173. ),
  174. 'url' => array(
  175. 'name' => t('Item URL (link)'),
  176. 'description' => t('URL of the feed item.'),
  177. ),
  178. 'guid' => array(
  179. 'name' => t('Item GUID'),
  180. 'description' => t('Global Unique Identifier of the feed item.'),
  181. ),
  182. 'tags' => array(
  183. 'name' => t('Categories'),
  184. 'description' => t('An array of categories that have been assigned to the feed item.'),
  185. ),
  186. 'domains' => array(
  187. 'name' => t('Category domains'),
  188. 'description' => t('Domains of the categories.'),
  189. ),
  190. 'location_latitude' => array(
  191. 'name' => t('Latitudes'),
  192. 'description' => t('An array of latitudes assigned to the feed item.'),
  193. ),
  194. 'location_longitude' => array(
  195. 'name' => t('Longitudes'),
  196. 'description' => t('An array of longitudes assigned to the feed item.'),
  197. ),
  198. 'enclosures' => array(
  199. 'name' => t('Enclosures'),
  200. 'description' => t('An array of enclosures attached to the feed item.'),
  201. ),
  202. ) + parent::getMappingSources();
  203. }
  204. /**
  205. * Returns cache directory. Creates it if it doesn't exist.
  206. */
  207. protected function cacheDirectory() {
  208. $directory = 'public://simplepie';
  209. file_prepare_directory($dir, FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS);
  210. return $directory;
  211. }
  212. /**
  213. * Generate a title from a random text.
  214. */
  215. protected function createTitle($text = FALSE) {
  216. // Explode to words and use the first 3 words.
  217. $words = preg_split("/[\s,]+/", $text);
  218. $words = array_slice($words, 0, 3);
  219. return implode(' ', $words);
  220. }
  221. }