FeedsFileFetcher.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. <?php
  2. /**
  3. * @file
  4. * Home of the FeedsFileFetcher and related classes.
  5. */
  6. /**
  7. * Definition of the import batch object created on the fetching stage by
  8. * FeedsFileFetcher.
  9. */
  10. class FeedsFileFetcherResult extends FeedsFetcherResult {
  11. /**
  12. * Constructor.
  13. */
  14. public function __construct($file_path) {
  15. parent::__construct('');
  16. $this->file_path = $file_path;
  17. }
  18. /**
  19. * Overrides parent::getRaw().
  20. */
  21. public function getRaw() {
  22. return $this->sanitizeRaw(file_get_contents($this->file_path));
  23. }
  24. /**
  25. * Overrides parent::getFilePath().
  26. */
  27. public function getFilePath() {
  28. if (!is_readable($this->file_path)) {
  29. throw new Exception(t('File @filepath is not accessible.', array('@filepath' => $this->file_path)));
  30. }
  31. return $this->sanitizeFile($this->file_path);
  32. }
  33. }
  34. /**
  35. * Fetches data via HTTP.
  36. */
  37. class FeedsFileFetcher extends FeedsFetcher {
  38. /**
  39. * Implements FeedsFetcher::fetch().
  40. */
  41. public function fetch(FeedsSource $source) {
  42. $source_config = $source->getConfigFor($this);
  43. // Just return a file fetcher result if this is a file.
  44. if (is_file($source_config['source'])) {
  45. return new FeedsFileFetcherResult($source_config['source']);
  46. }
  47. // Batch if this is a directory.
  48. $state = $source->state(FEEDS_FETCH);
  49. $files = array();
  50. if (!isset($state->files)) {
  51. $state->files = $this->listFiles($source_config['source']);
  52. $state->total = count($state->files);
  53. }
  54. if (count($state->files)) {
  55. $file = array_shift($state->files);
  56. $state->progress($state->total, $state->total - count($state->files));
  57. return new FeedsFileFetcherResult($file);
  58. }
  59. throw new Exception(t('Resource is not a file or it is an empty directory: %source', array('%source' => $source_config['source'])));
  60. }
  61. /**
  62. * Returns an array of files in a directory.
  63. *
  64. * @param string $dir
  65. * A stream wreapper URI that is a directory.
  66. *
  67. * @return array
  68. * An array of stream wrapper URIs pointing to files. The array is empty if
  69. * no files could be found. Never contains directories.
  70. */
  71. protected function listFiles($dir) {
  72. // Seperate out string into array of extensions. Make sure its regex safe.
  73. $config = $this->getConfig();
  74. $extensions = array_filter(array_map('preg_quote', explode(' ', $config['allowed_extensions'])));
  75. $regex = '/\.(' . implode('|', $extensions) . ')$/';
  76. $files = array();
  77. foreach (file_scan_directory($dir, $regex) as $file) {
  78. $files[] = $file->uri;
  79. }
  80. return $files;
  81. }
  82. /**
  83. * Source form.
  84. */
  85. public function sourceForm($source_config) {
  86. $form = array();
  87. $form['fid'] = array(
  88. '#type' => 'value',
  89. '#value' => empty($source_config['fid']) ? 0 : $source_config['fid'],
  90. );
  91. if (empty($this->config['direct'])) {
  92. $form['source'] = array(
  93. '#type' => 'value',
  94. '#value' => empty($source_config['source']) ? '' : $source_config['source'],
  95. );
  96. $form['upload'] = array(
  97. '#type' => 'file',
  98. '#title' => empty($this->config['direct']) ? t('File') : NULL,
  99. '#description' => empty($source_config['source']) ? t('Select a file from your local system.') : t('Select a different file from your local system.'),
  100. '#theme_wrappers' => array('feeds_upload'),
  101. '#file_info' => empty($source_config['fid']) ? NULL : file_load($source_config['fid']),
  102. '#size' => 10,
  103. );
  104. }
  105. else {
  106. $form['source'] = array(
  107. '#type' => 'textfield',
  108. '#title' => t('File'),
  109. '#description' => t('Specify a path to a file or a directory. Prefix the path with a scheme. Available schemes: @schemes.', array('@schemes' => implode(', ', $this->config['allowed_schemes']))),
  110. '#default_value' => empty($source_config['source']) ? '' : $source_config['source'],
  111. );
  112. }
  113. return $form;
  114. }
  115. /**
  116. * Overrides parent::sourceFormValidate().
  117. */
  118. public function sourceFormValidate(&$values) {
  119. $values['source'] = trim($values['source']);
  120. if (empty($this->config['direct'])) {
  121. $feed_dir = $this->config['directory'];
  122. if (!file_prepare_directory($feed_dir, FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS)) {
  123. if (user_access('administer feeds')) {
  124. $plugin_key = feeds_importer($this->id)->config[$this->pluginType()]['plugin_key'];
  125. $link = url('admin/structure/feeds/' . $this->id . '/settings/' . $plugin_key);
  126. form_set_error('feeds][FeedsFileFetcher][source', t('Upload failed. Please check the upload <a href="@link">settings.</a>', array('@link' => $link)));
  127. }
  128. else {
  129. form_set_error('feeds][FeedsFileFetcher][source', t('Upload failed. Please contact your site administrator.'));
  130. }
  131. watchdog('feeds', 'The upload directory %directory required by a feed could not be created or is not accessible. A newly uploaded file could not be saved in this directory as a consequence, and the upload was canceled.', array('%directory' => $feed_dir));
  132. }
  133. // Validate and save uploaded file.
  134. elseif ($file = file_save_upload('feeds', array('file_validate_extensions' => array(0 => $this->config['allowed_extensions'])), $feed_dir)) {
  135. $values['source'] = $file->uri;
  136. $values['file'] = $file;
  137. }
  138. elseif (empty($values['source'])) {
  139. form_set_error('feeds][FeedsFileFetcher][source', t('Please upload a file.'));
  140. }
  141. else {
  142. // File present from previous upload. Nothing to validate.
  143. }
  144. }
  145. else {
  146. // Check if chosen url scheme is allowed.
  147. $scheme = file_uri_scheme($values['source']);
  148. if (!$scheme || !in_array($scheme, $this->config['allowed_schemes'])) {
  149. form_set_error('feeds][FeedsFileFetcher][source', t("The file needs to reside within the site's files directory, its path needs to start with scheme://. Available schemes: @schemes.", array('@schemes' => implode(', ', $this->config['allowed_schemes']))));
  150. }
  151. // Check whether the given path is readable.
  152. elseif (!is_readable($values['source'])) {
  153. form_set_error('feeds][FeedsFileFetcher][source', t('The specified file or directory does not exist.'));
  154. }
  155. }
  156. }
  157. /**
  158. * Overrides parent::sourceSave().
  159. */
  160. public function sourceSave(FeedsSource $source) {
  161. $source_config = $source->getConfigFor($this);
  162. // If a new file is present, delete the old one and replace it with the new
  163. // one.
  164. if (isset($source_config['file'])) {
  165. $file = $source_config['file'];
  166. if (isset($source_config['fid'])) {
  167. $this->deleteFile($source_config['fid'], $source->feed_nid);
  168. }
  169. $file->status = FILE_STATUS_PERMANENT;
  170. file_save($file);
  171. file_usage_add($file, 'feeds', get_class($this), $source->feed_nid);
  172. $source_config['fid'] = $file->fid;
  173. unset($source_config['file']);
  174. $source->setConfigFor($this, $source_config);
  175. }
  176. }
  177. /**
  178. * Overrides parent::sourceDelete().
  179. */
  180. public function sourceDelete(FeedsSource $source) {
  181. $source_config = $source->getConfigFor($this);
  182. if (isset($source_config['fid'])) {
  183. $this->deleteFile($source_config['fid'], $source->feed_nid);
  184. }
  185. }
  186. /**
  187. * Overrides parent::configDefaults().
  188. */
  189. public function configDefaults() {
  190. $schemes = $this->getSchemes();
  191. $scheme = in_array('private', $schemes) ? 'private' : 'public';
  192. return array(
  193. 'allowed_extensions' => 'txt csv tsv xml opml',
  194. 'direct' => FALSE,
  195. 'directory' => $scheme . '://feeds',
  196. 'allowed_schemes' => $schemes,
  197. );
  198. }
  199. /**
  200. * Overrides parent::configForm().
  201. */
  202. public function configForm(&$form_state) {
  203. $form = array();
  204. $form['allowed_extensions'] = array(
  205. '#type' => 'textfield',
  206. '#title' => t('Allowed file extensions'),
  207. '#description' => t('Allowed file extensions for upload.'),
  208. '#default_value' => $this->config['allowed_extensions'],
  209. );
  210. $form['direct'] = array(
  211. '#type' => 'checkbox',
  212. '#title' => t('Supply path to file or directory directly'),
  213. '#description' => t('For experts. Lets users specify a path to a file <em>or a directory of files</em> directly,
  214. instead of a file upload through the browser. This is useful when the files that need to be imported
  215. are already on the server.'),
  216. '#default_value' => $this->config['direct'],
  217. );
  218. $form['directory'] = array(
  219. '#type' => 'textfield',
  220. '#title' => t('Upload directory'),
  221. '#description' => t('Directory where uploaded files get stored. Prefix the path with a scheme. Available schemes: @schemes.', array('@schemes' => implode(', ', $this->getSchemes()))),
  222. '#default_value' => $this->config['directory'],
  223. '#states' => array(
  224. 'visible' => array(':input[name="direct"]' => array('checked' => FALSE)),
  225. 'required' => array(':input[name="direct"]' => array('checked' => FALSE)),
  226. ),
  227. );
  228. if ($options = $this->getSchemeOptions()) {
  229. $form['allowed_schemes'] = array(
  230. '#type' => 'checkboxes',
  231. '#title' => t('Allowed schemes'),
  232. '#default_value' => $this->config['allowed_schemes'],
  233. '#options' => $options,
  234. '#description' => t('Select the schemes you want to allow for direct upload.'),
  235. '#states' => array(
  236. 'visible' => array(':input[name="direct"]' => array('checked' => TRUE)),
  237. ),
  238. );
  239. }
  240. return $form;
  241. }
  242. /**
  243. * Overrides parent::configFormValidate().
  244. *
  245. * Ensure that the chosen directory is accessible.
  246. */
  247. public function configFormValidate(&$values) {
  248. $values['directory'] = trim($values['directory']);
  249. $values['allowed_schemes'] = array_filter($values['allowed_schemes']);
  250. if (!$values['direct']) {
  251. // Ensure that the upload directory field is not empty when not in
  252. // direct-mode.
  253. if (!$values['directory']) {
  254. form_set_error('directory', t('Please specify an upload directory.'));
  255. // Do not continue validating the directory if none was specified.
  256. return;
  257. }
  258. // Validate the URI scheme of the upload directory.
  259. $scheme = file_uri_scheme($values['directory']);
  260. if (!$scheme || !in_array($scheme, $this->getSchemes())) {
  261. form_set_error('directory', t('Please enter a valid scheme into the directory location.'));
  262. // Return here so that attempts to create the directory below don't
  263. // throw warnings.
  264. return;
  265. }
  266. // Ensure that the upload directory exists.
  267. if (!file_prepare_directory($values['directory'], FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS)) {
  268. form_set_error('directory', t('The chosen directory does not exist and attempts to create it failed.'));
  269. }
  270. }
  271. }
  272. /**
  273. * Deletes a file.
  274. *
  275. * @param int $fid
  276. * The file id.
  277. * @param int $feed_nid
  278. * The feed node's id, or 0 if a standalone feed.
  279. *
  280. * @return bool|array
  281. * TRUE for success, FALSE in the event of an error, or an array if the file
  282. * is being used by any modules.
  283. *
  284. * @see file_delete()
  285. */
  286. protected function deleteFile($fid, $feed_nid) {
  287. if ($file = file_load($fid)) {
  288. file_usage_delete($file, 'feeds', get_class($this), $feed_nid);
  289. return file_delete($file);
  290. }
  291. return FALSE;
  292. }
  293. /**
  294. * Returns available schemes.
  295. *
  296. * @return array
  297. * The available schemes.
  298. */
  299. protected function getSchemes() {
  300. return array_keys(file_get_stream_wrappers(STREAM_WRAPPERS_WRITE_VISIBLE));
  301. }
  302. /**
  303. * Returns available scheme options for use in checkboxes or select list.
  304. *
  305. * @return array
  306. * The available scheme array keyed scheme => description
  307. */
  308. protected function getSchemeOptions() {
  309. $options = array();
  310. foreach (file_get_stream_wrappers(STREAM_WRAPPERS_WRITE_VISIBLE) as $scheme => $info) {
  311. $options[$scheme] = check_plain($scheme . ': ' . $info['description']);
  312. }
  313. return $options;
  314. }
  315. }