FeedsHTTPFetcher.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. <?php
  2. /**
  3. * @file
  4. * Home of the FeedsHTTPFetcher and related classes.
  5. */
  6. feeds_include_library('PuSHSubscriber.inc', 'PuSHSubscriber');
  7. /**
  8. * Result of FeedsHTTPFetcher::fetch().
  9. */
  10. class FeedsHTTPFetcherResult extends FeedsFetcherResult {
  11. /**
  12. * The URL of the feed being fetched.
  13. *
  14. * @var string
  15. */
  16. protected $url;
  17. /**
  18. * The timeout in seconds to wait for a download.
  19. *
  20. * @var int
  21. */
  22. protected $timeout;
  23. /**
  24. *
  25. * Whether to ignore SSL validation errors.
  26. *
  27. * @var bool
  28. */
  29. protected $acceptInvalidCert;
  30. /**
  31. * Constructor.
  32. */
  33. public function __construct($url = NULL) {
  34. $this->url = $url;
  35. }
  36. /**
  37. * Overrides FeedsFetcherResult::getRaw();
  38. */
  39. public function getRaw() {
  40. if (!isset($this->raw)) {
  41. feeds_include_library('http_request.inc', 'http_request');
  42. $result = http_request_get($this->url, NULL, NULL, $this->acceptInvalidCert, $this->timeout);
  43. if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206))) {
  44. throw new Exception(t('Download of @url failed with code !code.', array('@url' => $this->url, '!code' => $result->code)));
  45. }
  46. $this->raw = $result->data;
  47. }
  48. return $this->sanitizeRaw($this->raw);
  49. }
  50. public function getTimeout() {
  51. return $this->timeout;
  52. }
  53. public function setTimeout($timeout) {
  54. $this->timeout = $timeout;
  55. }
  56. /**
  57. * Sets the accept invalid certificates option.
  58. *
  59. * @param bool $accept_invalid_cert
  60. * Whether to accept invalid certificates.
  61. */
  62. public function setAcceptInvalidCert($accept_invalid_cert) {
  63. $this->acceptInvalidCert = (bool) $accept_invalid_cert;
  64. }
  65. }
  66. /**
  67. * Fetches data via HTTP.
  68. */
  69. class FeedsHTTPFetcher extends FeedsFetcher {
  70. /**
  71. * Implements FeedsFetcher::fetch().
  72. */
  73. public function fetch(FeedsSource $source) {
  74. $source_config = $source->getConfigFor($this);
  75. if ($this->config['use_pubsubhubbub'] && ($raw = $this->subscriber($source->feed_nid)->receive())) {
  76. return new FeedsFetcherResult($raw);
  77. }
  78. $fetcher_result = new FeedsHTTPFetcherResult($source_config['source']);
  79. // When request_timeout is empty, the global value is used.
  80. $fetcher_result->setTimeout($this->config['request_timeout']);
  81. $fetcher_result->setAcceptInvalidCert($this->config['accept_invalid_cert']);
  82. return $fetcher_result;
  83. }
  84. /**
  85. * Clear caches.
  86. */
  87. public function clear(FeedsSource $source) {
  88. $source_config = $source->getConfigFor($this);
  89. $url = $source_config['source'];
  90. feeds_include_library('http_request.inc', 'http_request');
  91. http_request_clear_cache($url);
  92. }
  93. /**
  94. * Implements FeedsFetcher::request().
  95. */
  96. public function request($feed_nid = 0) {
  97. feeds_dbg($_GET);
  98. @feeds_dbg(file_get_contents('php://input'));
  99. // A subscription verification has been sent, verify.
  100. if (isset($_GET['hub_challenge'])) {
  101. $this->subscriber($feed_nid)->verifyRequest();
  102. }
  103. // No subscription notification has ben sent, we are being notified.
  104. else {
  105. try {
  106. feeds_source($this->id, $feed_nid)->existing()->import();
  107. }
  108. catch (Exception $e) {
  109. // In case of an error, respond with a 503 Service (temporary) unavailable.
  110. header('HTTP/1.1 503 "Not Found"', NULL, 503);
  111. drupal_exit();
  112. }
  113. }
  114. // Will generate the default 200 response.
  115. header('HTTP/1.1 200 "OK"', NULL, 200);
  116. drupal_exit();
  117. }
  118. /**
  119. * Override parent::configDefaults().
  120. */
  121. public function configDefaults() {
  122. return array(
  123. 'auto_detect_feeds' => FALSE,
  124. 'use_pubsubhubbub' => FALSE,
  125. 'designated_hub' => '',
  126. 'request_timeout' => NULL,
  127. 'auto_scheme' => 'http',
  128. 'accept_invalid_cert' => FALSE,
  129. );
  130. }
  131. /**
  132. * Override parent::configForm().
  133. */
  134. public function configForm(&$form_state) {
  135. $form = array();
  136. $form['auto_detect_feeds'] = array(
  137. '#type' => 'checkbox',
  138. '#title' => t('Auto detect feeds'),
  139. '#description' => t('If the supplied URL does not point to a feed but an HTML document, attempt to extract a feed URL from the document.'),
  140. '#default_value' => $this->config['auto_detect_feeds'],
  141. );
  142. $form['use_pubsubhubbub'] = array(
  143. '#type' => 'checkbox',
  144. '#title' => t('Use PubSubHubbub'),
  145. '#description' => t('Attempt to use a <a href="http://en.wikipedia.org/wiki/PubSubHubbub">PubSubHubbub</a> subscription if available.'),
  146. '#default_value' => $this->config['use_pubsubhubbub'],
  147. );
  148. $form['advanced'] = array(
  149. '#title' => t('Advanced settings'),
  150. '#type' => 'fieldset',
  151. '#collapsible' => TRUE,
  152. '#collapsed' => TRUE,
  153. );
  154. $form['advanced']['auto_scheme'] = array(
  155. '#type' => 'textfield',
  156. '#title' => t('Automatically add scheme'),
  157. '#description' => t('If the supplied URL does not contain the scheme, use this one automatically. Keep empty to force the user to input the scheme.'),
  158. '#default_value' => $this->config['auto_scheme'],
  159. );
  160. $form['advanced']['designated_hub'] = array(
  161. '#type' => 'textfield',
  162. '#title' => t('Designated hub'),
  163. '#description' => t('Enter the URL of a designated PubSubHubbub hub (e. g. superfeedr.com). If given, this hub will be used instead of the hub specified in the actual feed.'),
  164. '#default_value' => $this->config['designated_hub'],
  165. '#states' => array(
  166. 'visible' => array(':input[name="use_pubsubhubbub"]' => array('checked' => TRUE)),
  167. ),
  168. );
  169. // Per importer override of global http request timeout setting.
  170. $form['advanced']['request_timeout'] = array(
  171. '#type' => 'textfield',
  172. '#title' => t('Request timeout'),
  173. '#description' => t('Timeout in seconds to wait for an HTTP get request to finish.</br>' .
  174. '<b>Note:</b> this setting will override the global setting.</br>' .
  175. 'When left empty, the global value is used.'),
  176. '#default_value' => $this->config['request_timeout'],
  177. '#element_validate' => array('element_validate_integer_positive'),
  178. '#maxlength' => 3,
  179. '#size'=> 30,
  180. );
  181. $form['advanced']['accept_invalid_cert'] = array(
  182. '#type' => 'checkbox',
  183. '#title' => t('Accept invalid SSL certificates'),
  184. '#description' => t('<strong>IMPORTANT:</strong> This setting will force cURL to completely ignore all SSL errors. This is a <strong>major security risk</strong> and should only be used during development.'),
  185. '#default_value' => $this->config['accept_invalid_cert'],
  186. );
  187. return $form;
  188. }
  189. /**
  190. * Expose source form.
  191. */
  192. public function sourceForm($source_config) {
  193. $form = array();
  194. $form['source'] = array(
  195. '#type' => 'textfield',
  196. '#title' => t('URL'),
  197. '#description' => t('Enter a feed URL.'),
  198. '#default_value' => isset($source_config['source']) ? $source_config['source'] : '',
  199. '#maxlength' => NULL,
  200. '#required' => TRUE,
  201. );
  202. return $form;
  203. }
  204. /**
  205. * Override parent::sourceFormValidate().
  206. */
  207. public function sourceFormValidate(&$values) {
  208. $values['source'] = trim($values['source']);
  209. // Keep a copy for error messages.
  210. $original_url = $values['source'];
  211. $parts = parse_url($values['source']);
  212. if (empty($parts['scheme']) && $this->config['auto_scheme']) {
  213. $values['source'] = $this->config['auto_scheme'] . '://' . $values['source'];
  214. }
  215. if (!feeds_valid_url($values['source'], TRUE)) {
  216. $form_key = 'feeds][' . get_class($this) . '][source';
  217. form_set_error($form_key, t('The URL %source is invalid.', array('%source' => $original_url)));
  218. }
  219. elseif ($this->config['auto_detect_feeds']) {
  220. feeds_include_library('http_request.inc', 'http_request');
  221. $url = http_request_get_common_syndication($values['source'], array(
  222. 'accept_invalid_cert' => $this->config['accept_invalid_cert'],
  223. ));
  224. if ($url) {
  225. $values['source'] = $url;
  226. }
  227. }
  228. }
  229. /**
  230. * Override sourceSave() - subscribe to hub.
  231. */
  232. public function sourceSave(FeedsSource $source) {
  233. if ($this->config['use_pubsubhubbub']) {
  234. // If this is a feeds node we want to delay the subscription to
  235. // feeds_exit() to avoid transaction race conditions.
  236. if ($source->feed_nid) {
  237. $job = array('fetcher' => $this, 'source' => $source);
  238. feeds_set_subscription_job($job);
  239. }
  240. else {
  241. $this->subscribe($source);
  242. }
  243. }
  244. }
  245. /**
  246. * Override sourceDelete() - unsubscribe from hub.
  247. */
  248. public function sourceDelete(FeedsSource $source) {
  249. if ($this->config['use_pubsubhubbub']) {
  250. // If we're in a feed node, queue the unsubscribe,
  251. // else process immediately.
  252. if ($source->feed_nid) {
  253. $job = array(
  254. 'type' => $source->id,
  255. 'id' => $source->feed_nid,
  256. 'period' => 0,
  257. 'periodic' => FALSE,
  258. );
  259. JobScheduler::get('feeds_push_unsubscribe')->set($job);
  260. }
  261. else {
  262. $this->unsubscribe($source);
  263. }
  264. }
  265. }
  266. /**
  267. * Implement FeedsFetcher::subscribe() - subscribe to hub.
  268. */
  269. public function subscribe(FeedsSource $source) {
  270. $source_config = $source->getConfigFor($this);
  271. $this->subscriber($source->feed_nid)->subscribe($source_config['source'], url($this->path($source->feed_nid), array('absolute' => TRUE)), valid_url($this->config['designated_hub']) ? $this->config['designated_hub'] : '');
  272. }
  273. /**
  274. * Implement FeedsFetcher::unsubscribe() - unsubscribe from hub.
  275. */
  276. public function unsubscribe(FeedsSource $source) {
  277. $source_config = $source->getConfigFor($this);
  278. $this->subscriber($source->feed_nid)->unsubscribe($source_config['source'], url($this->path($source->feed_nid), array('absolute' => TRUE)));
  279. }
  280. /**
  281. * Implement FeedsFetcher::importPeriod().
  282. */
  283. public function importPeriod(FeedsSource $source) {
  284. if ($this->subscriber($source->feed_nid)->subscribed()) {
  285. return 259200; // Delay for three days if there is a successful subscription.
  286. }
  287. }
  288. /**
  289. * Convenience method for instantiating a subscriber object.
  290. */
  291. protected function subscriber($subscriber_id) {
  292. return PushSubscriber::instance($this->id, $subscriber_id, 'PuSHSubscription', PuSHEnvironment::instance());
  293. }
  294. }
  295. /**
  296. * Implement a PuSHSubscriptionInterface.
  297. */
  298. class PuSHSubscription implements PuSHSubscriptionInterface {
  299. public $domain;
  300. public $subscriber_id;
  301. public $hub;
  302. public $topic;
  303. public $status;
  304. public $secret;
  305. public $post_fields;
  306. public $timestamp;
  307. /**
  308. * Load a subscription.
  309. */
  310. public static function load($domain, $subscriber_id) {
  311. if ($v = db_query("SELECT * FROM {feeds_push_subscriptions} WHERE domain = :domain AND subscriber_id = :sid", array(':domain' => $domain, ':sid' => $subscriber_id))->fetchAssoc()) {
  312. $v['post_fields'] = unserialize($v['post_fields']);
  313. return new PuSHSubscription($v['domain'], $v['subscriber_id'], $v['hub'], $v['topic'], $v['secret'], $v['status'], $v['post_fields'], $v['timestamp']);
  314. }
  315. }
  316. /**
  317. * Create a subscription.
  318. */
  319. public function __construct($domain, $subscriber_id, $hub, $topic, $secret, $status = '', $post_fields = '') {
  320. $this->domain = $domain;
  321. $this->subscriber_id = $subscriber_id;
  322. $this->hub = $hub;
  323. $this->topic = $topic;
  324. $this->status = $status;
  325. $this->secret = $secret;
  326. $this->post_fields = $post_fields;
  327. }
  328. /**
  329. * Save a subscription.
  330. */
  331. public function save() {
  332. $this->timestamp = time();
  333. $this->delete($this->domain, $this->subscriber_id);
  334. drupal_write_record('feeds_push_subscriptions', $this);
  335. }
  336. /**
  337. * Delete a subscription.
  338. */
  339. public function delete() {
  340. db_delete('feeds_push_subscriptions')
  341. ->condition('domain', $this->domain)
  342. ->condition('subscriber_id', $this->subscriber_id)
  343. ->execute();
  344. }
  345. }
  346. /**
  347. * Provide environmental functions to the PuSHSubscriber library.
  348. */
  349. class PuSHEnvironment implements PuSHSubscriberEnvironmentInterface {
  350. /**
  351. * Singleton.
  352. */
  353. public static function instance() {
  354. static $env;
  355. if (empty($env)) {
  356. $env = new PuSHEnvironment();
  357. }
  358. return $env;
  359. }
  360. /**
  361. * Implements PuSHSubscriberEnvironmentInterface::msg().
  362. */
  363. public function msg($msg, $level = 'status') {
  364. drupal_set_message(check_plain($msg), $level);
  365. }
  366. /**
  367. * Implements PuSHSubscriberEnvironmentInterface::log().
  368. */
  369. public function log($msg, $level = 'status') {
  370. switch ($level) {
  371. case 'error':
  372. $severity = WATCHDOG_ERROR;
  373. break;
  374. case 'warning':
  375. $severity = WATCHDOG_WARNING;
  376. break;
  377. default:
  378. $severity = WATCHDOG_NOTICE;
  379. break;
  380. }
  381. feeds_dbg($msg);
  382. watchdog('FeedsHTTPFetcher', $msg, array(), $severity);
  383. }
  384. }