FeedsSimplePieParser.inc 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. <?php
  2. /**
  3. * @file
  4. * Contains FeedsSimplePieParser and related classes.
  5. */
  6. /**
  7. * Adapter to present SimplePie_Enclosure as FeedsEnclosure object.
  8. */
  9. class FeedsSimplePieEnclosure extends FeedsEnclosure {
  10. protected $simplepie_enclosure;
  11. private $_serialized_simplepie_enclosure;
  12. /**
  13. * Constructor requires SimplePie enclosure object.
  14. */
  15. function __construct(SimplePie_Enclosure $enclosure) {
  16. $this->simplepie_enclosure = $enclosure;
  17. }
  18. /**
  19. * Serialization helper.
  20. *
  21. * Handle the simplepie enclosure class separately ourselves.
  22. */
  23. public function __sleep() {
  24. $this->_serialized_simplepie_enclosure = serialize($this->simplepie_enclosure);
  25. return array('_serialized_simplepie_enclosure');
  26. }
  27. /**
  28. * Unserialization helper.
  29. *
  30. * Ensure that the simplepie class definitions are loaded for the enclosure when unserializing.
  31. */
  32. public function __wakeup() {
  33. feeds_include_simplepie();
  34. $this->simplepie_enclosure = unserialize($this->_serialized_simplepie_enclosure);
  35. }
  36. /**
  37. * Override parent::getValue().
  38. */
  39. public function getValue() {
  40. return $this->simplepie_enclosure->get_link();
  41. }
  42. /**
  43. * Override parent::getMIMEType().
  44. */
  45. public function getMIMEType() {
  46. return $this->simplepie_enclosure->get_real_type();
  47. }
  48. }
  49. /**
  50. * Class definition for Common Syndication Parser.
  51. *
  52. * Parses RSS and Atom feeds.
  53. */
  54. class FeedsSimplePieParser extends FeedsParser {
  55. /**
  56. * Implements FeedsParser::parse().
  57. */
  58. public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
  59. feeds_include_simplepie();
  60. // Initialize SimplePie.
  61. $parser = new SimplePie();
  62. $parser->set_raw_data($fetcher_result->getRaw());
  63. $parser->set_stupidly_fast(TRUE);
  64. $parser->encode_instead_of_strip(FALSE);
  65. // @todo Is caching effective when we pass in raw data?
  66. $parser->enable_cache(TRUE);
  67. $parser->set_cache_location($this->cacheDirectory());
  68. $parser->init();
  69. // Construct the standard form of the parsed feed
  70. $result = new FeedsParserResult();
  71. $result->title = html_entity_decode(($title = $parser->get_title()) ? $title : $this->createTitle($parser->get_description()));
  72. $result->description = $parser->get_description();
  73. $result->link = html_entity_decode($parser->get_link());
  74. $items_num = $parser->get_item_quantity();
  75. for ($i = 0; $i < $items_num; $i++) {
  76. $item = array();
  77. $simplepie_item = $parser->get_item($i);
  78. $item['title'] = html_entity_decode(($title = $simplepie_item->get_title()) ? $title : $this->createTitle($simplepie_item->get_content()));
  79. $item['description'] = $simplepie_item->get_content();
  80. $item['url'] = html_entity_decode($simplepie_item->get_link());
  81. // Use UNIX time. If no date is defined, fall back to REQUEST_TIME.
  82. $item['timestamp'] = $simplepie_item->get_date("U");
  83. if (empty($item['timestamp'])) {
  84. $item['timestamp'] = REQUEST_TIME;
  85. }
  86. $item['guid'] = $simplepie_item->get_id();
  87. // Use URL as GUID if there is no GUID.
  88. if (empty($item['guid'])) {
  89. $item['guid'] = $item['url'];
  90. }
  91. $author = $simplepie_item->get_author();
  92. $item['author_name'] = isset($author->name) ? html_entity_decode($author->name) : '';
  93. $item['author_link'] = isset($author->link) ? $author->link : '';
  94. $item['author_email'] = isset($author->email) ? $author->email : '';
  95. // Enclosures
  96. $enclosures = $simplepie_item->get_enclosures();
  97. if (is_array($enclosures)) {
  98. foreach ($enclosures as $enclosure) {
  99. $item['enclosures'][] = new FeedsSimplePieEnclosure($enclosure);
  100. }
  101. }
  102. // Location
  103. $latitude = $simplepie_item->get_latitude();
  104. $longitude = $simplepie_item->get_longitude();
  105. if (!is_null($latitude) && !is_null($longitude)) {
  106. $item['location_latitude'][] = $latitude;
  107. $item['location_longitude'][] = $longitude;
  108. }
  109. // Extract tags related to the item
  110. $simplepie_tags = $simplepie_item->get_categories();
  111. $tags = array();
  112. $domains = array();
  113. if (count($simplepie_tags) > 0) {
  114. foreach ($simplepie_tags as $tag) {
  115. $tags[] = (string) $tag->term;
  116. $domain = (string) $tag->get_scheme();
  117. if (!empty($domain)) {
  118. if (!isset($domains[$domain])) {
  119. $domains[$domain] = array();
  120. }
  121. $domains[$domain][] = count($tags) - 1;
  122. }
  123. }
  124. }
  125. $item['domains'] = $domains;
  126. $item['tags'] = $tags;
  127. // Allow parsing to be extended.
  128. $this->parseExtensions($item, $simplepie_item);
  129. $item['raw'] = $simplepie_item->data;
  130. $result->items[] = $item;
  131. }
  132. // Release parser.
  133. unset($parser);
  134. return $result;
  135. }
  136. /**
  137. * Allow extension of FeedsSimplePie item parsing.
  138. */
  139. protected function parseExtensions(&$item, $simplepie_item) {}
  140. /**
  141. * Return mapping sources.
  142. */
  143. public function getMappingSources() {
  144. return array(
  145. 'title' => array(
  146. 'name' => t('Title'),
  147. 'description' => t('Title of the feed item.'),
  148. ),
  149. 'description' => array(
  150. 'name' => t('Description'),
  151. 'description' => t('Description of the feed item.'),
  152. ),
  153. 'author_name' => array(
  154. 'name' => t('Author name'),
  155. 'description' => t('Name of the feed item\'s author.'),
  156. ),
  157. 'author_link' => array(
  158. 'name' => t('Author link'),
  159. 'description' => t('Link to the feed item\'s author.'),
  160. ),
  161. 'author_email' => array(
  162. 'name' => t('Author email'),
  163. 'description' => t('Email address of the feed item\'s author.'),
  164. ),
  165. 'timestamp' => array(
  166. 'name' => t('Published date'),
  167. 'description' => t('Published date as UNIX time GMT of the feed item.'),
  168. ),
  169. 'url' => array(
  170. 'name' => t('Item URL (link)'),
  171. 'description' => t('URL of the feed item.'),
  172. ),
  173. 'guid' => array(
  174. 'name' => t('Item GUID'),
  175. 'description' => t('Global Unique Identifier of the feed item.'),
  176. ),
  177. 'tags' => array(
  178. 'name' => t('Categories'),
  179. 'description' => t('An array of categories that have been assigned to the feed item.'),
  180. ),
  181. 'domains' => array(
  182. 'name' => t('Category domains'),
  183. 'description' => t('Domains of the categories.'),
  184. ),
  185. 'location_latitude' => array(
  186. 'name' => t('Latitudes'),
  187. 'description' => t('An array of latitudes assigned to the feed item.'),
  188. ),
  189. 'location_longitude' => array(
  190. 'name' => t('Longitudes'),
  191. 'description' => t('An array of longitudes assigned to the feed item.'),
  192. ),
  193. 'enclosures' => array(
  194. 'name' => t('Enclosures'),
  195. 'description' => t('An array of enclosures attached to the feed item.'),
  196. ),
  197. ) + parent::getMappingSources();
  198. }
  199. /**
  200. * Returns cache directory. Creates it if it doesn't exist.
  201. */
  202. protected function cacheDirectory() {
  203. $directory = 'public://simplepie';
  204. file_prepare_directory($dir, FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS);
  205. return $directory;
  206. }
  207. /**
  208. * Generate a title from a random text.
  209. */
  210. protected function createTitle($text = FALSE) {
  211. // Explode to words and use the first 3 words.
  212. $words = preg_split("/[\s,]+/", $text);
  213. $words = array_slice($words, 0, 3);
  214. return implode(' ', $words);
  215. }
  216. }