FeedsImporter.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. <?php
  2. /**
  3. * @file
  4. * FeedsImporter class and related.
  5. */
  6. /**
  7. * A FeedsImporter object describes how an external source should be fetched,
  8. * parsed and processed. Feeds can manage an arbitrary amount of importers.
  9. *
  10. * A FeedsImporter holds a pointer to a FeedsFetcher, a FeedsParser and a
  11. * FeedsProcessor plugin. It further contains the configuration for itself and
  12. * each of the three plugins.
  13. *
  14. * Its most important responsibilities are configuration management, interfacing
  15. * with the job scheduler and expiring of all items produced by this
  16. * importer.
  17. *
  18. * When a FeedsImporter is instantiated, it loads its configuration. Then it
  19. * instantiates one fetcher, one parser and one processor plugin depending on
  20. * the configuration information. After instantiating them, it sets them to
  21. * the configuration information it holds for them.
  22. */
  23. class FeedsImporter extends FeedsConfigurable {
  24. // Every feed has a fetcher, a parser and a processor.
  25. // These variable names match the possible return values of
  26. // FeedsPlugin::typeOf().
  27. protected $fetcher, $parser, $processor;
  28. // This array defines the variable names of the plugins above.
  29. protected $plugin_types = array('fetcher', 'parser', 'processor');
  30. /**
  31. * Instantiate class variables, initialize and configure
  32. * plugins.
  33. */
  34. protected function __construct($id) {
  35. parent::__construct($id);
  36. // Try to load information from database.
  37. $this->load();
  38. // Instantiate fetcher, parser and processor, set their configuration if
  39. // stored info is available.
  40. foreach ($this->plugin_types as $type) {
  41. $plugin = feeds_plugin($this->config[$type]['plugin_key'], $this->id);
  42. if (isset($this->config[$type]['config'])) {
  43. $plugin->setConfig($this->config[$type]['config']);
  44. }
  45. $this->$type = $plugin;
  46. }
  47. }
  48. /**
  49. * Remove items older than $time.
  50. *
  51. * @param $time
  52. * All items older than REQUEST_TIME - $time will be deleted. If not
  53. * given, internal processor settings will be used.
  54. *
  55. * @return
  56. * FEEDS_BATCH_COMPLETE if the expiry process finished. A decimal between
  57. * 0.0 and 0.9 periodic if expiry is still in progress.
  58. *
  59. * @throws
  60. * Throws Exception if an error occurs when expiring items.
  61. */
  62. public function expire($time = NULL) {
  63. return $this->processor->expire($time);
  64. }
  65. /**
  66. * Schedule all periodic tasks for this importer.
  67. */
  68. public function schedule() {
  69. $this->scheduleExpire();
  70. }
  71. /**
  72. * Schedule expiry of items.
  73. */
  74. public function scheduleExpire() {
  75. $job = array(
  76. 'type' => $this->id,
  77. 'period' => 0,
  78. 'periodic' => TRUE,
  79. );
  80. if (FEEDS_EXPIRE_NEVER != $this->processor->expiryTime()) {
  81. $job['period'] = 3600;
  82. JobScheduler::get('feeds_importer_expire')->set($job);
  83. }
  84. else {
  85. JobScheduler::get('feeds_importer_expire')->remove($job);
  86. }
  87. }
  88. /**
  89. * Report how many items *should* be created on one page load by this
  90. * importer.
  91. *
  92. * Note:
  93. *
  94. * It depends on whether parser implements batching if this limit is actually
  95. * respected. Further, if no limit is reported it doesn't mean that the
  96. * number of items that can be created on one page load is actually without
  97. * limit.
  98. *
  99. * @return
  100. * A positive number defining the number of items that can be created on
  101. * one page load. 0 if this number is unlimited.
  102. */
  103. public function getLimit() {
  104. return $this->processor->getLimit();
  105. }
  106. /**
  107. * Save configuration.
  108. */
  109. public function save() {
  110. $save = new stdClass();
  111. $save->id = $this->id;
  112. $save->config = $this->getConfig();
  113. if ($config = db_query("SELECT config FROM {feeds_importer} WHERE id = :id", array(':id' => $this->id))->fetchField()) {
  114. drupal_write_record('feeds_importer', $save, 'id');
  115. // Only rebuild menu if content_type has changed. Don't worry about
  116. // rebuilding menus when creating a new importer since it will default
  117. // to the standalone page.
  118. $config = unserialize($config);
  119. if ($config['content_type'] != $save->config['content_type']) {
  120. variable_set('menu_rebuild_needed', TRUE);
  121. }
  122. }
  123. else {
  124. drupal_write_record('feeds_importer', $save);
  125. }
  126. }
  127. /**
  128. * Load configuration and unpack.
  129. */
  130. public function load() {
  131. ctools_include('export');
  132. if ($config = ctools_export_load_object('feeds_importer', 'conditions', array('id' => $this->id))) {
  133. $config = array_shift($config);
  134. $this->export_type = $config->export_type;
  135. $this->disabled = isset($config->disabled) ? $config->disabled : FALSE;
  136. $this->config = $config->config;
  137. return TRUE;
  138. }
  139. return FALSE;
  140. }
  141. /**
  142. * Delete configuration. Removes configuration information
  143. * from database, does not delete configuration itself.
  144. */
  145. public function delete() {
  146. db_delete('feeds_importer')
  147. ->condition('id', $this->id)
  148. ->execute();
  149. $job = array(
  150. 'type' => $this->id,
  151. 'id' => 0,
  152. );
  153. if ($this->export_type & EXPORT_IN_CODE) {
  154. feeds_reschedule($this->id);
  155. }
  156. else {
  157. JobScheduler::get('feeds_importer_expire')->remove($job);
  158. }
  159. }
  160. /**
  161. * Set plugin.
  162. *
  163. * @param $plugin_key
  164. * A fetcher, parser or processor plugin.
  165. *
  166. * @todo Error handling, handle setting to the same plugin.
  167. */
  168. public function setPlugin($plugin_key) {
  169. // $plugin_type can be either 'fetcher', 'parser' or 'processor'
  170. if ($plugin_type = FeedsPlugin::typeOf($plugin_key)) {
  171. if ($plugin = feeds_plugin($plugin_key, $this->id)) {
  172. // Unset existing plugin, switch to new plugin.
  173. unset($this->$plugin_type);
  174. $this->$plugin_type = $plugin;
  175. // Set configuration information, blow away any previous information on
  176. // this spot.
  177. $this->config[$plugin_type] = array('plugin_key' => $plugin_key);
  178. }
  179. }
  180. }
  181. /**
  182. * Copy a FeedsImporter configuration into this importer.
  183. *
  184. * @param FeedsImporter $importer
  185. * The feeds importer object to copy from.
  186. */
  187. public function copy(FeedsConfigurable $configurable) {
  188. parent::copy($configurable);
  189. if ($configurable instanceof FeedsImporter) {
  190. // Instantiate new fetcher, parser and processor and initialize their
  191. // configurations.
  192. foreach ($this->plugin_types as $plugin_type) {
  193. $this->setPlugin($configurable->config[$plugin_type]['plugin_key']);
  194. $this->$plugin_type->setConfig($configurable->config[$plugin_type]['config']);
  195. }
  196. }
  197. }
  198. /**
  199. * Get configuration of this feed.
  200. */
  201. public function getConfig() {
  202. foreach (array('fetcher', 'parser', 'processor') as $type) {
  203. $this->config[$type]['config'] = $this->$type->getConfig();
  204. }
  205. return parent::getConfig();
  206. }
  207. /**
  208. * Return defaults for feed configuration.
  209. */
  210. public function configDefaults() {
  211. return array(
  212. 'name' => '',
  213. 'description' => '',
  214. 'fetcher' => array(
  215. 'plugin_key' => 'FeedsHTTPFetcher',
  216. ),
  217. 'parser' => array(
  218. 'plugin_key' => 'FeedsSyndicationParser',
  219. ),
  220. 'processor' => array(
  221. 'plugin_key' => 'FeedsNodeProcessor',
  222. ),
  223. 'content_type' => '',
  224. 'update' => 0,
  225. 'import_period' => 1800, // Refresh every 30 minutes by default.
  226. 'expire_period' => 3600, // Expire every hour by default, this is a hidden setting.
  227. 'import_on_create' => TRUE, // Import on submission.
  228. 'process_in_background' => FALSE,
  229. );
  230. }
  231. /**
  232. * Override parent::configForm().
  233. */
  234. public function configForm(&$form_state) {
  235. $config = $this->getConfig();
  236. $form = array();
  237. $form['name'] = array(
  238. '#type' => 'textfield',
  239. '#title' => t('Name'),
  240. '#description' => t('A human readable name of this importer.'),
  241. '#default_value' => $config['name'],
  242. '#required' => TRUE,
  243. );
  244. $form['description'] = array(
  245. '#type' => 'textfield',
  246. '#title' => t('Description'),
  247. '#description' => t('A description of this importer.'),
  248. '#default_value' => $config['description'],
  249. );
  250. $node_types = node_type_get_names();
  251. array_walk($node_types, 'check_plain');
  252. $form['content_type'] = array(
  253. '#type' => 'select',
  254. '#title' => t('Attach to content type'),
  255. '#description' => t('If "Use standalone form" is selected a source is imported by using a form under !import_form.
  256. If a content type is selected a source is imported by creating a node of that content type.',
  257. array('!import_form' => l(url('import', array('absolute' => TRUE)), 'import', array('attributes' => array('target' => '_new'))))),
  258. '#options' => array('' => t('Use standalone form')) + $node_types,
  259. '#default_value' => $config['content_type'],
  260. );
  261. $cron_required = ' ' . l(t('Requires cron to be configured.'), 'http://drupal.org/cron', array('attributes' => array('target' => '_new')));
  262. $period = drupal_map_assoc(array(900, 1800, 3600, 10800, 21600, 43200, 86400, 259200, 604800, 2419200), 'format_interval');
  263. foreach ($period as &$p) {
  264. $p = t('Every !p', array('!p' => $p));
  265. }
  266. $period = array(
  267. FEEDS_SCHEDULE_NEVER => t('Off'),
  268. 0 => t('As often as possible'),
  269. ) + $period;
  270. $form['import_period'] = array(
  271. '#type' => 'select',
  272. '#title' => t('Periodic import'),
  273. '#options' => $period,
  274. '#description' => t('Choose how often a source should be imported periodically.') . $cron_required,
  275. '#default_value' => $config['import_period'],
  276. );
  277. $form['import_on_create'] = array(
  278. '#type' => 'checkbox',
  279. '#title' => t('Import on submission'),
  280. '#description' => t('Check if import should be started at the moment a standalone form or node form is submitted.'),
  281. '#default_value' => $config['import_on_create'],
  282. );
  283. $form['process_in_background'] = array(
  284. '#type' => 'checkbox',
  285. '#title' => t('Process in background'),
  286. '#description' => t('For very large imports. If checked, import and delete tasks started from the web UI will be handled by a cron task in the background rather than by the browser. This does not affect periodic imports, they are handled by a cron task in any case.') . $cron_required,
  287. '#default_value' => $config['process_in_background'],
  288. );
  289. return $form;
  290. }
  291. /**
  292. * Reschedule if import period changes.
  293. */
  294. public function configFormSubmit(&$values) {
  295. if ($this->config['import_period'] != $values['import_period']) {
  296. feeds_reschedule($this->id);
  297. }
  298. parent::configFormSubmit($values);
  299. }
  300. }
  301. /**
  302. * Helper, see FeedsDataProcessor class.
  303. */
  304. function feeds_format_expire($timestamp) {
  305. if ($timestamp == FEEDS_EXPIRE_NEVER) {
  306. return t('Never');
  307. }
  308. return t('after !time', array('!time' => format_interval($timestamp)));
  309. }