taxonomy_csv.import.api.inc 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. <?php
  2. /**
  3. * @file
  4. * Validate import options and manage import process.
  5. */
  6. /**
  7. * Invoke associated include file.
  8. */
  9. $module_dir = drupal_get_path('module', 'taxonomy_csv');
  10. require_once($module_dir . '/taxonomy_csv.api.inc');
  11. require_once($module_dir . '/import/taxonomy_csv.import.line.api.inc');
  12. require_once($module_dir . '/taxonomy_csv.vocabulary.api.inc');
  13. require_once($module_dir . '/taxonomy_csv.term.api.inc');
  14. /**
  15. * Process the import of an input.
  16. *
  17. * If not used in a form, don't forget to use batch_process().
  18. *
  19. * @param $options
  20. * An associative array of options:
  21. * - file : object file if file is already uploaded
  22. * - text : csv text to import
  23. * - url : url to distant or local csv file
  24. * - path : path to local csv file
  25. * - import_format : see _taxonomy_csv_values (default: 'flat')
  26. * - fields_format : array. list of machinename fields (default: 0 => 'name')
  27. * - translate_by : string. name (default) or tid
  28. * - translate_languages: array. List of languages for i18n (default: empty)
  29. * - keep_order : boolean. keep order of imported terms or not (default)
  30. * - delimiter : 1 character csv delimiter (default: ',')
  31. * - enclosure : 0 or 1 character csv enclosure (default: none or '"')
  32. * - filter_format : string. description field format (default: 'plain_text')
  33. * - filter_format_custom: string. custom fields format (default: 'none')
  34. * - language : string. terms' default language (default: neutral 'und')
  35. * - check_line : boolean. check or not (default) format of lines
  36. * - check_utf8 : boolean. check or not (default) utf8 format
  37. * - locale_custom : string. specific locale of imported file
  38. * - vocabulary_target: 'autocreate' (default), 'duplicate' or 'existing'
  39. * - vocabulary_id : vid or machine_name of the vocabulary to import into
  40. * - i18n_mode : integer. internationalization mode of autocreated
  41. * vocabulary (default: 0 (I18N_MODE_NONE))
  42. * - vocabulary_language: string. language code of autocreated vocabulary
  43. * (default: 'und' (undefined))
  44. * - fields_custom : array. custom fields to add or create (default: array())
  45. * - delete_terms : delete all terms before import (default: FALSE)
  46. * - check_hierarchy: boolean. check (default) or not vocabulary hierarchy
  47. * - set_hierarchy : if hierarchy isn't checked, set it (0, 1 or 2 (default))
  48. * - update_or_ignore : depends on import_format: 'update' (default) or 'ignore'
  49. * - check_options : boolean. check or not (default) this array of options
  50. * - result_display : boolean. display or not (default)
  51. * - result_stats : boolean. display or not (default) stats
  52. * - result_terms : boolean. display or not (default) list of imported terms
  53. * - result_level : log: 'first' (default), 'warnings', 'notices' or 'infos'
  54. * - result_type : display log 'by_message' (default) or 'by_line'
  55. * Only one option is required: file or text or url or path. Other options
  56. * have default values. Warning: default values are little different with UI.
  57. *
  58. * @return
  59. * Array of errors or nothing (need to execute batch process; result is logged
  60. * in watchdog).
  61. */
  62. function taxonomy_csv_import($options) {
  63. // Complete $options with default values if needed.
  64. // Default api and UI options are different.
  65. $options += _taxonomy_csv_values('import_default_api');
  66. // Preload text or file in order to check access to temporary folder.
  67. $messages = _taxonomy_csv_import_input_preload($options);
  68. if (count($messages)) {
  69. return $messages;
  70. }
  71. // Process import.
  72. return taxonomy_csv_vocabulary_import($options);
  73. }
  74. /**
  75. * Preload input.
  76. *
  77. * Automatically determinate source choice (file, text, path or url).
  78. * Then check if there is write access and prepare file.
  79. * To use a file is not optimal with array and text input, but this solution
  80. * unifies input and avoids some memory crashes.
  81. *
  82. * @todo Add an option to use only memory.
  83. *
  84. * @param $options
  85. * Array of options.
  86. *
  87. * @return
  88. * Array of messages errors if any.
  89. * By reference options are cleaned and completed.
  90. */
  91. function _taxonomy_csv_import_input_preload(&$options) {
  92. $messages = array();
  93. // File import. Used with UI.
  94. if (isset($options['file']) && is_object($options['file'])) {
  95. // File ready. No more check here.
  96. }
  97. // Text import.
  98. elseif (isset($options['text']) && $options['text'] != '') {
  99. // Prepare import by text: save text as a temp file to simplify process.
  100. $filename = file_unmanaged_save_data(
  101. $options['text'],
  102. 'public://taxocsv.csv',
  103. 'FILE_EXISTS_RENAME');
  104. $options['file'] = (object) array(
  105. 'filename' => basename($filename),
  106. 'filepath' => drupal_realpath($filename),
  107. 'filesize' => filesize($filename)
  108. );
  109. }
  110. // Url import.
  111. elseif (isset($options['url']) && $options['url'] != '') {
  112. $filename = file_unmanaged_save_data(
  113. file_get_contents($options['url']),
  114. 'public://taxocsv.csv',
  115. 'FILE_EXISTS_RENAME');
  116. $options['file'] = (object) array(
  117. 'filename' => basename($filename),
  118. 'filepath' => drupal_realpath($filename),
  119. 'filesize' => filesize($filename),
  120. );
  121. }
  122. // Path import. With UI, path is already uploaded as a file.
  123. elseif (isset($options['path']) && $options['path'] != '') {
  124. // Load source local file. No check for extension with API.
  125. $file = file_save_upload($options['path'], array('file_validate_extensions' => array('csv txt')));
  126. // fopen and fseek need a real path.
  127. if (!empty($file)) {
  128. $file->filepath = drupal_realpath($file->uri);
  129. if (!empty($file->filepath)) {
  130. $options['file'] = $file;
  131. }
  132. }
  133. }
  134. else {
  135. $messages['source_choice'] = t('Source choice should be a text, an url or a file path and source content should not be empty.');
  136. return $messages;
  137. }
  138. // Check file.
  139. if (!is_object($options['file'])
  140. || !$options['file']->filesize) {
  141. $messages['file'] = t('Import failed.') . '<br />'
  142. . t('- Check size of your input : it cannot be null.') . '<br />'
  143. . t('- Check your server configuration and your rights : server needs permission to access and to read file.') . '<br />'
  144. . t('- Check access rights to temp directory : import needs permission to write and to read in it.');
  145. return $messages;
  146. }
  147. // Check if input format is utf8, if file has no bom and convert it if needed.
  148. if (!_taxonomy_csv_import_clean_utf8($options['file'], $options['check_utf8'])) {
  149. $messages['file'] = t("Your file is not utf-8 formatted. Possible solutions below.") . '<br />'
  150. . t('- Convert your file to utf-8.') . '<br />'
  151. . t('- Install iconv, GNU recode or mbstring for PHP.') . '<br />'
  152. . t('- Disable "Check utf-8" option.');
  153. return $messages;
  154. }
  155. return $messages;
  156. }
  157. /**
  158. * Helper function to check if file is utf8 encoded and to remove bom if needed.
  159. *
  160. * See http://drupal.org/node/364832.
  161. * This function remove utf8 byte order mark if needed.
  162. *
  163. * @param $file
  164. * By reference file object to check.
  165. *
  166. * @param $check_utf8
  167. * Boolean. Check utf8 format of the file (default) or not.
  168. *
  169. * @return
  170. * TRUE if input is utf8 formatted or FALSE else. File is updated if needed.
  171. */
  172. function _taxonomy_csv_import_clean_utf8(&$file, $check_utf8 = TRUE) {
  173. $content = file_get_contents($file->filepath, TRUE);
  174. $flag = FALSE;
  175. // Check encoding.
  176. if ($check_utf8) {
  177. if (!function_exists('mb_detect_encoding')) {
  178. return FALSE;
  179. }
  180. $enc = mb_detect_encoding($content, 'UTF-8, ISO-8859-1, ISO-8859-15', TRUE);
  181. if ($enc != 'UTF-8') {
  182. $content = drupal_convert_to_utf8($content, $enc);
  183. if (!$content) {
  184. $messages[] = 320; // Error convert.
  185. return FALSE;
  186. }
  187. $flag = TRUE;
  188. }
  189. }
  190. // Skip eventual UTF-8 byte order mark.
  191. if (strncmp($content, "\xEF\xBB\xBF", 3) === 0) {
  192. $content = substr($content, 3);
  193. $flag = TRUE;
  194. }
  195. // Update content in file if needed.
  196. if ($flag) {
  197. $filename = file_unmanaged_save_data(
  198. $content,
  199. $file->uri,
  200. 'FILE_EXISTS_REPLACE');
  201. $file = (object) array(
  202. 'filename' => basename($filename),
  203. 'filepath' => drupal_realpath($filename),
  204. 'filesize' => filesize($filename),
  205. );
  206. }
  207. return TRUE;
  208. }
  209. /**
  210. * Helper to check if delimiter is a soft tab and to prepare file if needed.
  211. *
  212. * @param $file
  213. * By reference file object to check.
  214. *
  215. * @param $delimiter
  216. * String. Soft tab delimiter to switch with a true tab.
  217. *
  218. * @return
  219. * TRUE if delimiter isn't a soft tab or if file has been updated.
  220. * FALSE if file can't be updated with a true tab (currently not used).
  221. */
  222. function _taxonomy_csv_import_soft_tab(&$file, $delimiter) {
  223. if (drupal_strlen($delimiter) > 1) {
  224. $content = file_get_contents($file->filepath, TRUE);
  225. // Switch soft tab delimiter with a true tab.
  226. $content = str_replace($delimiter, "\t", $content);
  227. // Save updated file.
  228. $filename = file_unmanaged_save_data(
  229. $content,
  230. $file->uri,
  231. 'FILE_EXISTS_REPLACE');
  232. $file = (object) array(
  233. 'filename' => basename($filename),
  234. 'filepath' => drupal_realpath($filename),
  235. 'filesize' => filesize($filename),
  236. );
  237. }
  238. return TRUE;
  239. }
  240. /**
  241. * Prepare the batch to import the vocabulary.
  242. *
  243. * @note
  244. * If not used in a form, don't forget to use batch_process().
  245. *
  246. * @param $options
  247. * Array. Same as taxonomy_csv_import. See above.
  248. *
  249. * @return
  250. * Array of errors or nothing (batch process to execute).
  251. */
  252. function taxonomy_csv_vocabulary_import($options) {
  253. // Check options and return array of messages in case of errors.
  254. if ($options['check_options']) {
  255. $module_dir = drupal_get_path('module', 'taxonomy_csv');
  256. require_once("$module_dir/import/taxonomy_csv.import.admin.inc");
  257. $result = _taxonomy_csv_import_check_options($options);
  258. if (count($result)) {
  259. return $result;
  260. }
  261. }
  262. // Complete $options.
  263. // Switch soft tab delimiter with a true one if needed.
  264. if (drupal_strlen($options['delimiter']) > 1) {
  265. $result = _taxonomy_csv_import_soft_tab($options['file'], $options['delimiter']);
  266. $options['delimiter'] = "\t";
  267. }
  268. // Calculates number of lines to be imported. File is already checked.
  269. $options['total_lines'] = count(file($options['file']->filepath));
  270. // Prepare vocabularies. Options are passed by-reference and can be updated.
  271. $options['vocabulary'] = _taxonomy_csv_import_vocabulary_prepare($options);
  272. // Get infos about fields of vocabulary.
  273. if ($options['import_format'] == TAXONOMY_CSV_FORMAT_FIELDS) {
  274. $options['instances'] = field_info_instances('taxonomy_term', $options['vocabulary']->machine_name);
  275. $options['fields'] = array();
  276. if (is_array($options['instances'])) {
  277. foreach ($options['instances'] as $key => $value) {
  278. $options['fields'][$key] = field_info_field($key);
  279. }
  280. }
  281. }
  282. // Set locale if needed.
  283. // See http://drupal.org/node/872366
  284. $options['locale_previous'] = setlocale(LC_CTYPE, 0);
  285. if ($options['locale_custom']) {
  286. setlocale(LC_CTYPE, $options['locale_custom']);
  287. }
  288. // Prepare import batch.
  289. // Use a one step batch in order to avoid memory crash in case of big import.
  290. $batch = array(
  291. 'title' => ($options['source_choice'] == 'text') ?
  292. t('Importing !total_lines lines from text...', array(
  293. '!total_lines' => $options['total_lines'])) :
  294. t('Importing !total_lines lines from CSV file "%filename"...', array(
  295. '%filename' => $options['vocabulary']->name,
  296. '!total_lines' => $options['total_lines'])),
  297. 'init_message' => t('Starting uploading of datas...') . '<br />'
  298. . t('Wait some seconds for pre-processing...'),
  299. 'progress_message' => '',
  300. 'error_message' => t('An error occurred during the import.'),
  301. 'finished' => '_taxonomy_csv_vocabulary_import_finished',
  302. 'file' => drupal_get_path('module', 'taxonomy_csv') . '/import/taxonomy_csv.import.api.inc',
  303. 'progressive' => TRUE,
  304. 'operations' => array(
  305. 0 => array('_taxonomy_csv_vocabulary_import_process', array($options)),
  306. ),
  307. );
  308. batch_set($batch);
  309. }
  310. /**
  311. * Batch process of vocabulary import.
  312. *
  313. * @param $options
  314. * Array of batch options.
  315. * @param &$context
  316. * Batch context to keep results and messages.
  317. *
  318. * @return
  319. * NULL because use of &$context.
  320. */
  321. function _taxonomy_csv_vocabulary_import_process($options, &$context) {
  322. // Session or batch context are needed, because with batch process, static
  323. // and $GLOBALS don't work for large import.
  324. // First callback to prepare batch context.
  325. if (empty($context['sandbox'])) {
  326. // Remember options as batch_set can't use form_storage.
  327. // It allows too that first line in result is numbered 1 and not 0.
  328. $context['results'][0] = $options;
  329. // Automatically detect line endings.
  330. ini_set('auto_detect_line_endings', '1');
  331. // Initialize some variables.
  332. $context['results'][0]['current_line'] = 0;
  333. $context['results'][0]['worst_line'] = 0;
  334. $context['results'][0]['worst_message'] = 799;
  335. $context['results'][0]['terms_count'] = 0;
  336. $context['results'][0]['handle'] = fopen($options['file']->filepath, 'r');
  337. $context['sandbox']['handle_pointer'] = 0;
  338. $context['sandbox']['previous_items'] = array(
  339. 'name' => array(),
  340. 'tid' => array(),
  341. );
  342. }
  343. elseif (!is_resource($context['results'][0]['handle'])) {
  344. // Recall file and set pointer in case of memory or time out.
  345. $context['results'][0]['handle'] = fopen($options['file']->filepath, 'r');
  346. fseek($context['results'][0]['handle'], $context['sandbox']['handle_pointer']);
  347. }
  348. // Load and process one line.
  349. $line_number = &$context['results'][0]['current_line'];
  350. $worst_line = &$context['results'][0]['worst_line'];
  351. $worst_message = &$context['results'][0]['worst_message'];
  352. $terms_count = &$context['results'][0]['terms_count'];
  353. $handle = &$context['results'][0]['handle'];
  354. $previous_items = &$context['sandbox']['previous_items'];
  355. // To set locale is needed with fgetcsv and it's needed each time this
  356. // function is called. See http:/php.net/manual/en/function.fgetcsv.php.
  357. // See http://drupal.org/node/872366
  358. // @todo Use of preg_split?
  359. if ($options['locale_custom']) {
  360. setlocale(LC_CTYPE, $options['locale_custom']);
  361. }
  362. $line = fgetcsv($handle, 4096, $options['delimiter'], $options['enclosure']);
  363. if ($line) {
  364. ++$line_number;
  365. // Remember pointer in case of memory or time out.
  366. $context['sandbox']['handle_pointer'] = ftell($handle);
  367. // Process import of current line.
  368. $result = taxonomy_csv_line_import_process($line, $options, $previous_items, $terms_count);
  369. // Remember processed line.
  370. $previous_items['name'] = $result['name'];
  371. $previous_items['tid'] = $result['tid'];
  372. $terms_count = $result['terms_count'];
  373. // Remember first worst message of imported lines.
  374. $worst_message_new = _taxonomy_csv_worst_message($result['msg']);
  375. if ($worst_message > $worst_message_new) {
  376. $worst_message = $worst_message_new;
  377. $worst_line = $line_number;
  378. };
  379. // Remember only wanted messages.
  380. if (($options['result_stats'] || $options['result_terms'])
  381. && $result['tid']
  382. ) {
  383. // Some formats use $result to save other infos, so it needs to be cleaned
  384. // before to be saved.
  385. if (in_array($format['import_format'], array(
  386. TAXONOMY_CSV_FORMAT_STRUCTURE,
  387. TAXONOMY_CSV_FORMAT_TREE,
  388. TAXONOMY_CSV_FORMAT_POLYHIERARCHY,
  389. ))) {
  390. // Don't remember previous previous items.
  391. $context['results'][0]['imported_terms'][0] = array_values($result['tid']);
  392. }
  393. else {
  394. $context['results'][0]['imported_terms'][$line_number] = $result['tid'];
  395. }
  396. }
  397. if ($options['result_level'] != 'first') {
  398. $list_messages = array();
  399. switch ($options['result_level']) {
  400. // case 'first':
  401. // break;
  402. case 'warnings':
  403. foreach ($result['msg'] as $msg) {
  404. if ($msg < TAXONOMY_CSV_PROCESS_NOTICE) {
  405. $list_messages[] = $msg;
  406. }
  407. }
  408. break;
  409. case 'notices':
  410. foreach ($result['msg'] as $msg) {
  411. if ($msg < TAXONOMY_CSV_PROCESS_INFO) {
  412. $list_messages[] = $msg;
  413. }
  414. }
  415. break;
  416. case 'infos':
  417. $list_messages = $result['msg'];
  418. break;
  419. }
  420. if (count($list_messages)) {
  421. $context['results'][$line_number] = $list_messages;
  422. }
  423. }
  424. // Inform about progress.
  425. $context['message'] = t('Line !line_number of !total_lines processed: !line', array(
  426. '!line_number' => $line_number,
  427. '!total_lines' => $options['total_lines'],
  428. '!line' => '"' . implode('", "', $line) . '"',
  429. ));
  430. // Check worst message of imported lines and update progress.
  431. if ($worst_message >= TAXONOMY_CSV_PROCESS_WARNING) {
  432. // Count should be <= 0.99 to avoid batch stop before end (Drupal 7 bug).
  433. $context['finished'] = floor($line_number / $options['total_lines'] * 100) / 100;
  434. }
  435. else {
  436. $context['finished'] = 1;
  437. }
  438. }
  439. }
  440. /**
  441. * Callback for finished batch import and display result informations.
  442. */
  443. function _taxonomy_csv_vocabulary_import_finished($success, $results, $operations) {
  444. // $results[0] is used to save options and some infos (imported terms), as
  445. // batch process can't use $form_state.
  446. $options = &$results[0];
  447. unset($results[0]);
  448. // Close imported file.
  449. if ($options['handle']) {
  450. fclose($options['handle']);
  451. }
  452. // Set previous locale if needed.
  453. if ($options['locale_custom']) {
  454. setlocale(LC_CTYPE, $options['locale_previous']);
  455. }
  456. // Clean Session.
  457. unset($_SESSION['taxonomy_csv_import']);
  458. // Invoke import stats file if user wants to display texts for result.
  459. if ($options['result_display']) {
  460. $module_dir = drupal_get_path('module', 'taxonomy_csv');
  461. require_once("$module_dir/import/taxonomy_csv.import.result.inc");
  462. }
  463. // Short summary information is different if batch succeeded or not.
  464. if ($success) {
  465. $variables = array(
  466. '!line' => $options['worst_line'],
  467. '!total_lines' => $options['total_lines'],
  468. '!worst_msg' => $options['result_display'] ?
  469. _taxonomy_csv_message_text($options['worst_message']) :
  470. t('Message code') . ' = ' . $options['worst_message'],
  471. );
  472. $messages = array(
  473. WATCHDOG_DEBUG => t('No error, warnings or notices have been reported during import process of !total_lines lines.', $variables),
  474. WATCHDOG_INFO => t('No error, warnings or notices have been reported during import process of !total_lines lines.', $variables),
  475. WATCHDOG_NOTICE => t('Notices have been reported during import process (bad formatted or empty lines). !total_lines lines processed. First notice occurred on line !line [!worst_msg].', $variables),
  476. WATCHDOG_WARNING => t('Warnings have been reported during import process (bad formatted lines). !total_lines lines processed. First line skipped is line !line [!worst_msg].', $variables),
  477. WATCHDOG_ERROR => t('Errors have been reported during import process. Process failed at line !line of a total of !total_lines [!worst_msg].', $variables),
  478. );
  479. $worst_level = intval($options['worst_message'] / 100);
  480. $message = $messages[$worst_level];
  481. }
  482. else {
  483. $message = t('Importation failed. Import process was successful until the line !line_count of a total of !total_lines. You can first check your file on this line and check file uploading.', array(
  484. '!line_count' => $options['current_line'],
  485. '!total_lines' => $options['total_lines'],
  486. )) . '<br />'
  487. . t('This issue is related to import process or to size import and probably not to content. You can disable hierarchy check and reduce log level. You can divide your import file into lighter files. You can increase php and sql memory. If problem does not disappear, you can reinstall module from a fresh release or submit an issue on <a href="!link">Taxonomy CSV import/export module</a>.', array(
  488. '!link' => url('http://drupal.org/project/issues/taxonomy_csv/'),
  489. ));
  490. $worst_level = WATCHDOG_ERROR;
  491. }
  492. // Set result message in watchdog and eventually in user interface.
  493. // Use of a $message variable is unrecommended, but simpler and working.
  494. // See http://drupal.org/node/323101
  495. watchdog('taxonomy_csv', $message, NULL, $worst_level);
  496. if ($options['result_display']) {
  497. _taxonomy_csv_import_result($options, $worst_level, $message, $results);
  498. }
  499. }
  500. /**
  501. * Prepare a vocabulary for import.
  502. *
  503. * @param $options
  504. * Array of batch options.
  505. * @param $check
  506. * (Optional) Boolean used to determine if some options are checked or not.
  507. *
  508. * @return
  509. * Prepared vocabulary object. $options can be updated.
  510. */
  511. function _taxonomy_csv_import_vocabulary_prepare(&$options, $check = TRUE) {
  512. $name = '';
  513. if ($check) {
  514. // Use name of file or url as vocabulary name.
  515. if (isset($options['url']) && $options['url'] != '') {
  516. $name = basename($options['url']);
  517. }
  518. elseif (isset($options['text']) && $options['text'] != '') {
  519. $name = '';
  520. // Remove useless option, because text is now saved.
  521. $options['text'] = '';
  522. }
  523. elseif (isset($options['file']->filename) && $options['file']->filename != '') {
  524. $name = $options['file']->filename;
  525. }
  526. }
  527. // Create, duplicate or use an existing vocabulary.
  528. switch ($options['vocabulary_target']) {
  529. case 'autocreate':
  530. $vocabulary = taxonomy_csv_vocabulary_create($name);
  531. $options['vocabulary_id'] = $vocabulary->vid;
  532. // Update vocabulary with language options.
  533. if (module_exists('i18n_taxonomy')) {
  534. $vocabulary->i18n_mode = $options['i18n_mode'];
  535. $vocabulary->language = $options['vocabulary_language'];
  536. $result = taxonomy_vocabulary_save($vocabulary);
  537. }
  538. break;
  539. case 'existing':
  540. $vocabulary = taxonomy_vocabulary_load($options['vocabulary_id']);
  541. // Optional deletion of terms.
  542. if ($options['delete_terms']) {
  543. $tids = taxonomy_csv_vocabulary_get_tids($vocabulary->vid);
  544. $result = taxonomy_csv_term_delete_multiple($tids);
  545. }
  546. break;
  547. }
  548. // Synchronize vocabulary internationalization options.
  549. if (module_exists('i18n_taxonomy')) {
  550. $options['i18n_mode'] = $vocabulary->i18n_mode;
  551. $options['vocabulary_language'] = $vocabulary->language;
  552. if ($vocabulary->i18n_mode == I18N_MODE_LOCALIZE) {
  553. $options['filter_format'] = 'plain_text';
  554. }
  555. }
  556. // Add or create custom fields if needed.
  557. if ($options['import_format'] == TAXONOMY_CSV_FORMAT_FIELDS) {
  558. $fields_custom = array_flip($options['fields_format']);
  559. // Set default format to items of the custom format.
  560. foreach ($fields_custom as $key => $value) {
  561. if (in_array($key, array(
  562. 'tid',
  563. 'vid',
  564. 'name',
  565. 'description',
  566. 'format',
  567. 'weight',
  568. 'language',
  569. 'i18n_tsid',
  570. 'guid',
  571. 'vocabulary_machine_name',
  572. 'parent',
  573. ))) {
  574. unset($fields_custom[$key]);
  575. }
  576. else {
  577. $fields_custom[$key] = 'text';
  578. }
  579. }
  580. // Add items of the custom fields. Allowed types are already checked.
  581. foreach ($options['fields_custom'] as $key => $value) {
  582. $field_name = (strpos($value, '|') === FALSE) ?
  583. $value :
  584. trim(drupal_substr($value, 0, strpos($value, '|')));
  585. $field_type = (strpos($value, '|') === FALSE) ?
  586. 'text' :
  587. trim(drupal_substr($value, strpos($value, '|') + 1));
  588. $fields_custom[$field_name] = $field_type;
  589. }
  590. // Add or create each custom fields.
  591. foreach ($fields_custom as $field_name => $field_type) {
  592. // Set default field type.
  593. $field = array(
  594. 'field_name' => $field_name,
  595. 'label' => $field_name,
  596. 'description' => '',
  597. 'type' => $field_type,
  598. 'cardinality' => FIELD_CARDINALITY_UNLIMITED,
  599. // Currently, translatable term reference fields are not supported.
  600. 'translatable' => FALSE,
  601. );
  602. switch ($field_type) {
  603. case 'taxonomy_term_reference':
  604. $field += array(
  605. 'settings' => array('allowed_values' => array(0 => array(
  606. 'vocabulary' => $vocabulary->machine_name,
  607. 'parent' => 0,
  608. ))));
  609. break;
  610. case 'list_boolean':
  611. $field += array(
  612. 'settings' => array('allowed_values' => array(
  613. '0' => 'FALSE',
  614. '1' => 'TRUE',
  615. )));
  616. break;
  617. }
  618. $result = taxonomy_csv_vocabulary_field_attach($vocabulary->machine_name, $field);
  619. }
  620. }
  621. return $vocabulary;
  622. }
  623. /**
  624. * Import a line that contains a term and other items matching the options.
  625. *
  626. * @param $line
  627. * Array which contains items of a csv line.
  628. * @param $options
  629. * An associative array of import options:
  630. * - import_format : format of the csv line (see taxonomy.api.inc)
  631. * - keep_order : boolean. keep order of imported terms or not (default)
  632. * - vocabulary_id : vocabulary id to import into
  633. * - update_or_ignore: indicates what will become existing terms, if any.
  634. * - check_line : boolean. Tweak to check (default) or not format of lines
  635. * - check_utf8 : boolean. Tweak to check (default) or not utf8 format
  636. * @param $previous_items
  637. * (Optional) Cleaned and checked previous imported line names and tids array.
  638. * Needed with some contents as one term array structure.
  639. * @param $terms_count
  640. * (Optional integer) Total of imported terms (duplicate included) is needed
  641. * to set weight of terms and to keep order of items, if wished.
  642. *
  643. * @return
  644. * Result array:
  645. * - 'name' => array of imported terms names,
  646. * - 'tid' => array of imported terms tids,
  647. * - 'msg' => messages arrays:
  648. * term position => array of status messages of term,
  649. * 'line' => array of status messages of line,
  650. * - 'terms_count' => total of imported terms.
  651. */
  652. function taxonomy_csv_line_import_process($line, $options, $previous_items = array(), $terms_count = 0) {
  653. // Define default values.
  654. $line_messages = array();
  655. $items_messages = array();
  656. $result = array(
  657. 'name' => array(),
  658. 'tid' => array(),
  659. 'msg' => array(),
  660. 'terms_count' => $terms_count,
  661. );
  662. // 1. Validate and clean line.
  663. if ($options['check_line']) {
  664. $line = _taxonomy_csv_line_import_clean(
  665. $line,
  666. $line_messages);
  667. // 2. Check items of line if no error occurs and if line is not empty.
  668. if ((_taxonomy_csv_worst_message($line_messages) >= TAXONOMY_CSV_PROCESS_NOTICE)
  669. && ((count($line_messages) == 0) || ($line_messages[0] != 696))) {
  670. $line = _taxonomy_csv_line_import_check(
  671. $line,
  672. $options,
  673. $previous_items,
  674. $items_messages);
  675. $line_messages = array_merge($line_messages, $items_messages);
  676. // 3. Process import items with full checked line.
  677. if (_taxonomy_csv_worst_message($items_messages) >= TAXONOMY_CSV_PROCESS_NOTICE) {
  678. $result = taxonomy_csv_line_import(
  679. $line,
  680. $options,
  681. $previous_items,
  682. $terms_count);
  683. // Add line level message of bad or successful import.
  684. $line_messages[] = (_taxonomy_csv_worst_message($result['msg']) >= TAXONOMY_CSV_PROCESS_NOTICE) ? 699 : 499;
  685. }
  686. }
  687. }
  688. else {
  689. // No checks, so directly import line after a simple trim.
  690. $result = taxonomy_csv_line_import(
  691. array_values(array_map('trim', $line)),
  692. $options,
  693. $previous_items,
  694. $terms_count);
  695. // Add line level message of bad or successful import.
  696. $line_messages[] = (_taxonomy_csv_worst_message($result['msg']) >= TAXONOMY_CSV_PROCESS_NOTICE) ? 699 : 499;
  697. }
  698. // Keep previous items in case of an empty or an unprocessed line.
  699. if (count($result['name']) == 0) {
  700. $result['name'] = $previous_items['name'];
  701. $result['tid'] = $previous_items['tid'];
  702. }
  703. // Add line level messages and clean result.
  704. $result['msg'] = array_unique(array_merge($result['msg'], $line_messages));
  705. sort($result['msg']);
  706. return $result;
  707. }
  708. /**
  709. * Helper function to clean an imported line.
  710. *
  711. * @todo To be simplified.
  712. *
  713. * @param $line
  714. * Array of items to be processed.
  715. * @param &$messages
  716. * (Optional) By reference array of messages codes to be returned.
  717. *
  718. * @return
  719. * Array of cleaned imported line.
  720. */
  721. function _taxonomy_csv_line_import_clean($line, &$messages = array()) {
  722. $cleaned_line = array();
  723. // Example: string "Term 1".
  724. if (!is_array($line)) {
  725. $messages[] = 310; // Error not a line array.
  726. }
  727. // Example: " " or unrecognized line.
  728. elseif ((count($line) == 0)
  729. || empty($line)
  730. || ((count($line) == 1) && (trim($line[0]) == ''))) {
  731. $messages[] = 696; // Info empty line.
  732. }
  733. else {
  734. $cleaned_line = $line;
  735. if (!drupal_validate_utf8(implode(',', $line))) {
  736. $messages[] = 321; // Error validate.
  737. }
  738. // Trim and check empty line: useful for some non-Ascii lines.
  739. $line = array_map('trim', $cleaned_line);
  740. // @todo To simplify.
  741. // Example: " , , ".
  742. $test_line = array_unique($line);
  743. if (count($test_line) == 1 && in_array('', $test_line)) {
  744. $messages[] = 491; // Warning no item.
  745. }
  746. else {
  747. $cleaned_line = array_values($line);
  748. }
  749. }
  750. return $cleaned_line;
  751. }
  752. /**
  753. * Check a line to find duplicate items, empty items...
  754. *
  755. * @param $line
  756. * Array of items from a cleaned line.
  757. * @param $options
  758. * Array of available options. See taxonomy_csv_line_import_process.
  759. * @param $previous_items
  760. * (Optional) Cleaned and checked previous imported line names and tids array.
  761. * Needed with some contents as one term array structure.
  762. * @param &$messages
  763. * (Optional) By reference array of messages codes to be returned.
  764. *
  765. * @return
  766. * Array of checked items of imported line.
  767. */
  768. function _taxonomy_csv_line_import_check($line, $options, $previous_items = array(), &$messages = array()) {
  769. $checked_items = array();
  770. // Simplify used options.
  771. $update_or_ignore = &$options['update_or_ignore'];
  772. // No input check because line and previous line are already checked.
  773. // @todo A php callback function may be used to simplify checking.
  774. // @todo To be factorized and simplified.
  775. switch ($options['import_format']) {
  776. case TAXONOMY_CSV_FORMAT_FLAT:
  777. if (count($line) == 0) {
  778. $messages[] = 491; // Warning no item.
  779. break;
  780. }
  781. $checked_items = array_unique(array_filter($line));
  782. if (count($checked_items) < count($line)) {
  783. $messages[] = 531; // Notice duplicates, which are removed.
  784. }
  785. foreach ($checked_items as $name) {
  786. if (drupal_strlen($name) > 255) {
  787. $messages[] = 454; // Warning too long.
  788. break;
  789. }
  790. }
  791. break;
  792. case TAXONOMY_CSV_FORMAT_STRUCTURE:
  793. case TAXONOMY_CSV_FORMAT_TREE:
  794. case TAXONOMY_CSV_FORMAT_POLYHIERARCHY:
  795. // Check last empty column before first item with previous imported items.
  796. for ($first_non_empty = 0; ($first_non_empty < count($line)) && (empty($line[$first_non_empty])); $first_non_empty++) {
  797. }
  798. // Example: Previous line("Term 1,Item 2") ; Current line(",,,Item4")
  799. if ($first_non_empty && (!isset($previous_items['name'][$first_non_empty - 1]))) {
  800. $messages[] = 410; // Warning impossible to get parent.
  801. break;
  802. }
  803. // Example: Previous line("Term 1,Item 2") ; Current line(",,,Item4")
  804. // "0" value are lost, but that is not important for a taxonomy.
  805. $imported_items = array_filter(array_slice($line, $first_non_empty));
  806. if (count($imported_items) == 0) {
  807. $messages[] = 491; // Warning no item.
  808. break;
  809. }
  810. if (count($imported_items) < (count($line) - $first_non_empty)) {
  811. $messages[] = 510; // Notice empty items.
  812. }
  813. if (count(array_unique($imported_items)) < count($imported_items)) {
  814. $messages[] = 632; // Info duplicates (not removed).
  815. }
  816. if ($first_non_empty == 0) {
  817. $checked_items = $imported_items;
  818. }
  819. else {
  820. $checked_items = array_merge(array_fill(0, $first_non_empty, ''), $imported_items);
  821. }
  822. foreach ($checked_items as $name) {
  823. if (drupal_strlen($name) > 255) {
  824. $messages[] = 454; // Warning too long.
  825. break;
  826. }
  827. }
  828. break;
  829. case TAXONOMY_CSV_FORMAT_FIELDS:
  830. // Get number of items.
  831. $number_items = count($options['fields_format']);
  832. if (count($line) < count($options['fields_format'])) {
  833. $messages[] = 570; // Notice too little items.
  834. }
  835. elseif (count($line) > count($options['fields_format'])) {
  836. $messages[] = 564; // Notice too many items.
  837. }
  838. if (count(array_keys($options['fields_format'], '')) >= 1) {
  839. $messages[] = 513; // Notice empty items.
  840. }
  841. // @todo Add format check: required or not, type of datas...
  842. // Currently, check only if the first field is empty or not.
  843. if (empty($line[0])) {
  844. $messages[] = 464; // Warning no name.
  845. break;
  846. }
  847. $checked_items = $line;
  848. break;
  849. case TAXONOMY_CSV_FORMAT_TRANSLATE:
  850. if (empty($line[0])) {
  851. $messages[] = 484; // Warning no first/second column.
  852. break;
  853. }
  854. if (count($line) < 2) {
  855. $messages[] = 484; // Warning no first/second column.
  856. break;
  857. }
  858. $checked_items = $line;
  859. break;
  860. default:
  861. $messages[] = 306; // Error unknown source content.
  862. }
  863. return array_values($checked_items);
  864. }