remove_duplicates.module 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897
  1. <?php
  2. /**
  3. * @file
  4. * Remove duplicate nodes according to node fields or Custom fields.
  5. *
  6. * Author : Sami Radi - VirtuoWorks.
  7. */
  8. /**
  9. * Implements hook_permission().
  10. */
  11. function remove_duplicates_permission() {
  12. return array(
  13. 'administer remove_duplicates' => array(
  14. 'title' => t('Use Remove Duplicates'),
  15. 'restrict access' => TRUE,
  16. ),
  17. );
  18. }
  19. /**
  20. * Implements hook_menu().
  21. */
  22. function remove_duplicates_menu() {
  23. // Titles and Descriptions should no longer be wrapped in t().
  24. // See : https://drupal.org/node/140311
  25. $items['admin/config/content/remove_duplicates'] = array(
  26. 'title' => 'Remove Duplicates',
  27. 'description' => 'Delete Duplicate Nodes',
  28. 'page callback' => 'drupal_get_form',
  29. 'page arguments' => array('remove_duplicates_settings_form'),
  30. 'access arguments' => array('administer remove_duplicates'),
  31. 'type' => MENU_NORMAL_ITEM,
  32. );
  33. return $items;
  34. }
  35. /**
  36. * Form constructor for the module.
  37. *
  38. * Form constructor for the module settings page (step 1/2)
  39. * and confirm settings page (step 2/2).
  40. *
  41. * @see remove_duplicates_settings_submit()
  42. *
  43. * @ingroup forms.
  44. */
  45. function remove_duplicates_settings_form($form, &$form_state) {
  46. $remove_duplicates_message = t('Be careful, you might be losing data! I recommend doing a backup before removing duplicates.');
  47. drupal_set_message($remove_duplicates_message, 'warning', FALSE);
  48. if (!empty($form_state['storage']['confirm'])) {
  49. // Form constructor for the module settings confirmation page (Step 2/2).
  50. $form = remove_duplicates_build_confirm_settings_form($form, $form_state);
  51. }
  52. else {
  53. // Form constructor for the module settings page (Step 1/2).
  54. $form = remove_duplicates_build_settings_form($form, $form_state);
  55. }
  56. return $form;
  57. }
  58. /**
  59. * Implements hook_form_submit().
  60. *
  61. * Processing the settings form or the settings
  62. * confirmation form according to confirm state.
  63. */
  64. function remove_duplicates_settings_form_submit($form, &$form_state) {
  65. if (!empty($form_state['storage']['confirm'])) {
  66. // Processing the module settings confirmation form
  67. // (Processing form from step 2/2).
  68. remove_duplicates_confirm_settings_form_submit_process($form, $form_state);
  69. }
  70. else {
  71. // Processing the module settings form
  72. // (Processing form from step 1/2).
  73. remove_duplicates_settings_form_submit_process($form, $form_state);
  74. }
  75. }
  76. /**
  77. * Form constructor for the module settings page (Step 1/2).
  78. *
  79. * @see remove_duplicates_settings_form_submit()
  80. *
  81. * @ingroup forms.
  82. */
  83. function remove_duplicates_build_settings_form($form, &$form_state) {
  84. $message = t('All actions are logged in <b>Reports</b> >> <b>Recent log messages</b>.');
  85. $form['message'] = array(
  86. '#type' => 'item',
  87. '#markup' => '<div class="description"><p>' . $message . '</p></div>',
  88. );
  89. $node_types = node_type_get_names();
  90. $node_types_fields = _remove_duplicates_get_node_types_fields();
  91. $form['remove_duplicates_node_types'] = array(
  92. '#type' => 'select',
  93. '#title' => t('Select a node type.'),
  94. '#options' => $node_types,
  95. '#default_value' => variable_get('remove_duplicates_node_types', array('page')),
  96. '#description' => t('Select the node type from which duplicates are going to be found.'),
  97. );
  98. foreach ($node_types_fields as $machine_name => $node_type_fields) {
  99. $form[$machine_name . '_node_fields'] = array(
  100. '#type' => 'select',
  101. '#title' => t('Select a field from this node type.'),
  102. '#options' => $node_type_fields,
  103. '#states' => array(
  104. 'visible' => array(
  105. ':input[name="remove_duplicates_node_types"]' => array('value' => $machine_name),
  106. ),
  107. ),
  108. '#description' => t('Select the field which is going to be used to find duplicates for this node type .'),
  109. );
  110. }
  111. $options = array(
  112. 0 => t('Display results as a list (with duplicates to remove autoselection)'),
  113. 1 => t('Display results as a table (with duplicates to remove autoselection)'),
  114. 2 => t('Display results as a tableselect (with duplicates to remove manual selection)'),
  115. );
  116. $form['remove_duplicates_select_results_layout'] = array(
  117. '#type' => 'radios',
  118. '#title' => t('Results layout'),
  119. '#default_value' => 2,
  120. '#options' => $options,
  121. '#description' => t('You can choose between three layouts to display found duplicates.'),
  122. );
  123. $form['remove_duplicates_case_sensitive'] = array(
  124. '#type' => 'checkbox',
  125. '#title' => t('The search for duplicate nodes IS <b>case sensitive</b>.'),
  126. '#default_value' => TRUE,
  127. '#description' => t('If checked, duplicates search will be case sensitive.'),
  128. );
  129. $form['remove_duplicates_prioritize_published'] = array(
  130. '#type' => 'checkbox',
  131. '#title' => t('Keep at least one published node.'),
  132. '#default_value' => TRUE,
  133. '#states' => array(
  134. 'visible' => array(
  135. ':input[name="remove_duplicates_select_results_layout"]' => array('!value' => 2),
  136. ),
  137. 'invisible' => array(
  138. ':input[name="remove_duplicates_select_results_layout"]' => array('value' => 2),
  139. ),
  140. ),
  141. '#description' => t('At least one published node among the duplicates found will be kept. If unchecked, there will be no status check among duplicates.'),
  142. );
  143. $warning = t('PHP settings limit the maximum post size. If you have 1000+ duplicates found, <b>increase</b> your max post size setting to insure that the <b>whole</b> duplicate selection will be sent to the batch.');
  144. $form['warning'] = array(
  145. '#type' => 'item',
  146. '#markup' => '<div class="description"><p>' . $warning . '</p></div>',
  147. '#states' => array(
  148. 'visible' => array(
  149. ':input[name="remove_duplicates_select_results_layout"]' => array('value' => 2),
  150. ),
  151. ),
  152. );
  153. $form['message'] = array(
  154. '#type' => 'item',
  155. '#markup' => '<div class="description"><p>' . $message . '</p></div>',
  156. );
  157. $form['submit'] = array(
  158. '#type' => 'submit',
  159. '#value' => t('Find Duplicates'),
  160. );
  161. return $form;
  162. }
  163. /**
  164. * Processing the settings form (Processing step 1/2).
  165. *
  166. * If the settings form has not been confirmed,
  167. * the confirm form is set to be built.
  168. *
  169. * @see remove_duplicates_settings_form()
  170. */
  171. function remove_duplicates_settings_form_submit_process($form, &$form_state) {
  172. if (empty($form_state['storage']['confirm'])) {
  173. $form_state['rebuild'] = TRUE;
  174. $form_state['storage']['confirm'] = TRUE;
  175. }
  176. }
  177. /**
  178. * Form constructor for the module settings confirmation page (Step 2/2).
  179. *
  180. * @see remove_duplicates_confirm_settings_form_submit()
  181. *
  182. * @ingroup forms.
  183. */
  184. function remove_duplicates_build_confirm_settings_form($form, &$form_state) {
  185. if (isset($form_state['values']['remove_duplicates_node_types'])) {
  186. // Set node type hidden field.
  187. $node_type_machine_name = $form_state['values']['remove_duplicates_node_types'];
  188. $form['remove_duplicates_node_types'] = array(
  189. '#type' => 'hidden',
  190. '#value' => $node_type_machine_name,
  191. );
  192. if (isset($form_state['values'][$node_type_machine_name . '_node_fields'])) {
  193. $form[$node_type_machine_name . '_node_fields'] = array(
  194. '#type' => 'hidden',
  195. '#value' => $form_state['values'][$node_type_machine_name . '_node_fields'],
  196. );
  197. $node_field_machine_name = $form_state['values'][$node_type_machine_name . '_node_fields'];
  198. $prioritize_published_nodes = $form_state['values']['remove_duplicates_prioritize_published'];
  199. $case_sensitive = $form_state['values']['remove_duplicates_case_sensitive'];
  200. // Display found duplicates.
  201. switch ($form_state['values']['remove_duplicates_select_results_layout']) {
  202. case 1:
  203. $output = _remove_duplicates_get_table_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive);
  204. break;
  205. case 2:
  206. $output = _remove_duplicates_get_tableselect_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive);
  207. break;
  208. default:
  209. $output = _remove_duplicates_get_list_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive);
  210. }
  211. // The field name is not very explicit but short to insure
  212. // that the post size won't be too big even with a large
  213. // number of duplicates selected. Previously named :
  214. // remove_duplicates_duplicates_to_remove
  215. $form['r'] = $output['#element'];
  216. // End Of Display.
  217. if (empty($output['#proceed'])) {
  218. $remove_duplicates_message = t('Everything went fine. No duplicates were found.');
  219. drupal_set_message($remove_duplicates_message);
  220. $form_state['storage']['confirm'] = FALSE;
  221. }
  222. if (isset($form_state['values']['remove_duplicates_prioritize_published'])) {
  223. $form['remove_duplicates_prioritize_published'] = array(
  224. '#type' => 'hidden',
  225. '#value' => $form_state['values']['remove_duplicates_prioritize_published'],
  226. );
  227. }
  228. else {
  229. $remove_duplicates_message = t('Priority not set. No duplicates were deleted.');
  230. drupal_set_message($remove_duplicates_message, 'error');
  231. $form_state['storage']['confirm'] = FALSE;
  232. }
  233. if (isset($form_state['values']['remove_duplicates_case_sensitive'])) {
  234. $form['remove_duplicates_case_sensitive'] = array(
  235. '#type' => 'hidden',
  236. '#value' => $form_state['values']['remove_duplicates_case_sensitive'],
  237. );
  238. }
  239. else {
  240. $remove_duplicates_message = t('Case sensitivity not set. No duplicates were deleted.');
  241. drupal_set_message($remove_duplicates_message, 'error');
  242. $form_state['storage']['confirm'] = FALSE;
  243. }
  244. }
  245. else {
  246. $remove_duplicates_message = t('Node field not set. No duplicates were deleted.');
  247. drupal_set_message($remove_duplicates_message, 'error');
  248. $form_state['storage']['confirm'] = FALSE;
  249. }
  250. }
  251. else {
  252. $remove_duplicates_message = t('Node type not set. No duplicates were deleted.');
  253. drupal_set_message($remove_duplicates_message, 'error');
  254. $form_state['storage']['confirm'] = FALSE;
  255. }
  256. if (!empty($form_state['storage']['confirm'])) {
  257. $form = confirm_form(
  258. $form,
  259. t('Are you sure you want to remove duplicates ?'),
  260. 'admin/config/content/remove_duplicates',
  261. t('Found duplicates are going to be permanently removed.'),
  262. t('Remove Duplicates'),
  263. t('Cancel')
  264. );
  265. }
  266. else {
  267. $form['actions']['cancel'] = array(
  268. '#type' => 'item',
  269. '#markup' => l(t('Cancel'), 'admin/config/content/remove_duplicates'),
  270. );
  271. }
  272. return $form;
  273. }
  274. /**
  275. * Processing the settings form (Processing step 2/2).
  276. *
  277. * If the settings form has been confirmed, the batch is started.
  278. */
  279. function remove_duplicates_confirm_settings_form_submit_process($form, &$form_state) {
  280. if (!empty($form_state['storage']['confirm'])) {
  281. if (isset($form_state['values']['remove_duplicates_node_types'])) {
  282. $node_types = node_type_get_names();
  283. $node_type_machine_name = $form_state['values']['remove_duplicates_node_types'];
  284. if (isset($node_types[$node_type_machine_name])) {
  285. if (isset($form_state['values'][$node_type_machine_name . '_node_fields'])) {
  286. $node_field_info = field_info_instances('node', $node_type_machine_name);
  287. $node_field_machine_name = $form_state['values'][$node_type_machine_name . '_node_fields'];
  288. if ((is_array($node_field_info) && isset($node_field_info[$node_field_machine_name])) || ($node_field_machine_name == 'title')) {
  289. $prioritize_published_nodes = ((empty($form_state['values']['remove_duplicates_prioritize_published'])) ? FALSE : TRUE);
  290. $case_sensitive = ((empty($form_state['values']['remove_duplicates_case_sensitive'])) ? FALSE : TRUE);
  291. $nodes_marked_as_removable = ((empty($form_state['values']['r'])) ? array() : $form_state['values']['r']);
  292. $batch = array(
  293. 'title' => t('Searching for Duplicates'),
  294. 'operations' => array(
  295. array(
  296. 'remove_duplicates_batch_operation', array(
  297. $node_type_machine_name,
  298. $node_field_machine_name,
  299. $prioritize_published_nodes,
  300. $case_sensitive,
  301. $nodes_marked_as_removable,
  302. ),
  303. ),
  304. ),
  305. 'progress_message' => t('Work In Progress...'),
  306. 'error_message' => t('An Error Has Occured'),
  307. 'finished' => 'remove_duplicates_batch_finished',
  308. );
  309. // Watchdog message should not be wrapped in t().
  310. // See : https://api.drupal.org/comment/33838#comment-33838
  311. watchdog('remove_duplicates', 'Batch - Remove Duplicates batch start', array(), WATCHDOG_INFO);
  312. batch_set($batch);
  313. }
  314. else {
  315. $remove_duplicates_message = t('Node field selected not found. No duplicates were deleted');
  316. drupal_set_message($remove_duplicates_message, 'error');
  317. }
  318. }
  319. else {
  320. $remove_duplicates_message = t('No field selected. No duplicates were deleted');
  321. drupal_set_message($remove_duplicates_message, 'error');
  322. }
  323. }
  324. else {
  325. $remove_duplicates_message = t('Node type selected not found. No duplicates were deleted');
  326. drupal_set_message($remove_duplicates_message, 'error');
  327. }
  328. }
  329. else {
  330. $remove_duplicates_message = t('No node type selected. No duplicates were deleted');
  331. drupal_set_message($remove_duplicates_message, 'error');
  332. }
  333. }
  334. }
  335. /**
  336. * Operation for batch_set().
  337. *
  338. * @param string $node_type_machine_name
  339. * The node type to fetch.
  340. *
  341. * @param string $node_field_machine_name
  342. * The {field} used to group nodes and therefore create sets
  343. * of duplicate nodes.
  344. *
  345. * @param bool $prioritize_published_nodes
  346. * If TRUE, the last published node in a set of duplicate nodes will be kept.
  347. * Otherwise, the first node in a set of duplicate nodes will be kept.
  348. *
  349. * @param bool $case_sensitive
  350. * If TRUE, duplicates detection is case sensitive
  351. * Otherwise, duplicates detection is case insensitive.
  352. *
  353. * @param array $nodes_marked_as_removable
  354. * [Optional] An array of nids to remove.
  355. * Provided when using custom tableselect output.
  356. */
  357. function remove_duplicates_batch_operation($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive, $nodes_marked_as_removable, &$context) {
  358. if (isset($context['sandbox']) && isset($context['sandbox']['nodes_to_remove'])) {
  359. $nodes_to_remove = $context['sandbox']['nodes_to_remove'];
  360. }
  361. else {
  362. $nodes_to_remove = _remove_duplicates_get_nodes_ids_to_remove($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive, $nodes_marked_as_removable);
  363. }
  364. if (empty($context['sandbox'])) {
  365. $context['sandbox']['current'] = 0;
  366. $context['sandbox']['progress'] = 0;
  367. $context['sandbox']['max'] = count($nodes_to_remove);
  368. $context['sandbox']['nodes_to_remove'] = $nodes_to_remove;
  369. }
  370. if (empty($nodes_to_remove)) {
  371. $context['finished'] = 1;
  372. if (isset($context['sandbox']) && isset($context['sandbox']['nodes_to_remove'])) {
  373. unset($context['sandbox']['nodes_to_remove']);
  374. }
  375. }
  376. else {
  377. $limit = 5;
  378. if (count($nodes_to_remove) < $limit) {
  379. $limit = count($nodes_to_remove);
  380. }
  381. $preserve_keys = TRUE;
  382. $nodes_to_remove = array_slice($nodes_to_remove, 0, $limit, $preserve_keys);
  383. if (count($nodes_to_remove)) {
  384. $nodes_to_remove_nids = array();
  385. foreach ($nodes_to_remove as $node_to_remove) {
  386. if (is_object($node_to_remove)) {
  387. if ($node_to_remove->nid) {
  388. $nodes_to_remove_nids[$node_to_remove->nid] = $node_to_remove->nid;
  389. }
  390. watchdog('remove_duplicates', 'Batch - Duplicate node @nid deleted : @title | updated on @changed | created on @created.', array(
  391. '@nid' => (isset($node_to_remove->nid)) ? $node_to_remove->nid : NULL,
  392. '@title' => (isset($node_to_remove->title)) ? $node_to_remove->title : NULL,
  393. '@changed' => (isset($node_to_remove->changed)) ? format_date($node_to_remove->changed) : NULL,
  394. '@created' => (isset($node_to_remove->created)) ? format_date($node_to_remove->created) : NULL,
  395. ), WATCHDOG_DEBUG);
  396. }
  397. }
  398. node_delete_multiple($nodes_to_remove_nids);
  399. foreach ($nodes_to_remove_nids as $nid) {
  400. if (isset($context['sandbox']['nodes_to_remove']) && isset($context['sandbox']['nodes_to_remove'][$nid])) {
  401. unset($context['sandbox']['nodes_to_remove'][$nid]);
  402. }
  403. }
  404. }
  405. $context['sandbox']['progress'] = $context['sandbox']['progress'] + $limit;
  406. if ($context['sandbox']['progress'] != $context['sandbox']['max'] && $context['sandbox']['max'] != 0) {
  407. $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
  408. }
  409. else {
  410. $context['finished'] = 0.99;
  411. }
  412. }
  413. $context['results']['processed'] = $context['sandbox']['progress'];
  414. }
  415. /**
  416. * Callback for batch_set().
  417. *
  418. * @param bool $success
  419. * A boolean indicating whether the batch operation successfully concluded.
  420. * @param int $results
  421. * The results from the batch process.
  422. * @param array $operations
  423. * The batch operations that remained unprocessed. Only relevant if $success
  424. * is FALSE.
  425. *
  426. * @ingroup callbacks
  427. */
  428. function remove_duplicates_batch_finished($success, $results, $operations) {
  429. if (empty($success)) {
  430. $remove_duplicates_message = t('Everything went fine. No duplicates deleted');
  431. watchdog('remove_duplicates', 'Batch - Remove Duplicates batch end. No duplicates deleted.', array(), WATCHDOG_INFO);
  432. }
  433. else {
  434. $remove_duplicates_message = t('@processed duplicates deleted.', array('@processed' => ((string) $results['processed'])));
  435. watchdog('remove_duplicates', 'Batch - Remove Duplicates batch end. @processed duplicates deleted.', array('@processed' => ((string) $results['processed'])), WATCHDOG_INFO);
  436. }
  437. drupal_set_message($remove_duplicates_message);
  438. }
  439. /**
  440. * Get all duplicate nodes ids to remove.
  441. *
  442. * @param string $node_type_machine_name
  443. * The node type to fetch.
  444. *
  445. * @param string $node_field_machine_name
  446. * The {field} used to group nodes and therefore create sets of
  447. * duplicate nodes.
  448. *
  449. * @param bool $prioritize_published_nodes
  450. * If TRUE, the last published node in a set of duplicate nodes will be kept.
  451. * Otherwise, the first node in a set of duplicate nodes will be kept.
  452. *
  453. * @param bool $case_sensitive
  454. * If TRUE, duplicates detection is case sensitive
  455. * Otherwise, duplicates detection is case insensitive.
  456. *
  457. * @param array $nodes_marked_as_removable
  458. * [Optional] An array of nids to remove.
  459. * Provided when using custom tableselect output.
  460. *
  461. * @return array
  462. * An array of node nids
  463. */
  464. function _remove_duplicates_get_nodes_ids_to_remove($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive, $nodes_marked_as_removable = array()) {
  465. $duplicate_node_groups = _remove_duplicates_get_duplicate_node_groups($node_type_machine_name, $node_field_machine_name, $case_sensitive);
  466. if (is_array($duplicate_node_groups)) {
  467. if (isset($duplicate_node_groups['count']) && is_array($duplicate_node_groups['count'])) {
  468. $count_nodes = (array_key_exists('nodes', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['nodes'] : 0;
  469. $count_node_groups = (array_key_exists('node_groups', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['node_groups'] : 0;
  470. watchdog('remove_duplicates', 'Batch - Found duplicate nodes : [@count_nodes] | node groups : [@count_node_groups]', array(
  471. '@count_nodes' => $count_nodes,
  472. '@count_node_groups' => $count_node_groups,
  473. ), WATCHDOG_INFO);
  474. // If duplicate nodes to remove were not manually selected.
  475. if (empty($nodes_marked_as_removable)) {
  476. watchdog('remove_duplicates', 'Batch - Duplicate nodes to remove estimate before filtering : [@nodes_to_remove_estimate]', array(
  477. '@nodes_to_remove_estimate' => $count_nodes - $count_node_groups,
  478. ), WATCHDOG_INFO);
  479. }
  480. else {
  481. if (count($nodes_marked_as_removable) < $count_nodes) {
  482. $remove_duplicates_message = t('All the data from table selection was not recovered. Check out yout PHP settings to increase maximum post size or re-run search and remove operations to delete remaining duplicates.');
  483. drupal_set_message($remove_duplicates_message, 'error');
  484. watchdog('remove_duplicates', 'Batch - All the data from table selection was not recovered. Check out yout PHP settings to increase maximum post size.', array(), WATCHDOG_ERROR);
  485. }
  486. $nodes_marked_as_removable_count = count(array_filter($nodes_marked_as_removable));
  487. watchdog('remove_duplicates', 'Batch - Duplicate nodes to remove estimate before filtering : [@nodes_to_remove_estimate]', array(
  488. '@nodes_to_remove_estimate' => $nodes_marked_as_removable_count,
  489. ), WATCHDOG_INFO);
  490. }
  491. }
  492. if (isset($duplicate_node_groups['data']) && is_array($duplicate_node_groups['data']) && count($duplicate_node_groups['data'])) {
  493. $nodes_to_remove = array();
  494. foreach ($duplicate_node_groups['data'] as $duplicate_node_group) {
  495. if (is_array($duplicate_node_group) && count($duplicate_node_group) > 1) {
  496. if (empty($nodes_marked_as_removable)) {
  497. // Preserving the first node in the current duplicate node group.
  498. $node_to_keep = array_shift($duplicate_node_group);
  499. }
  500. // Filling the array with the rest of the nodes in the group.
  501. foreach ($duplicate_node_group as $duplicate_node) {
  502. if (is_object($duplicate_node) && isset($duplicate_node->nid) && isset($duplicate_node->status)) {
  503. if (!empty($nodes_marked_as_removable)) {
  504. if (!empty($nodes_marked_as_removable[$duplicate_node->nid])) {
  505. $nodes_to_remove[$duplicate_node->nid] = $duplicate_node;
  506. }
  507. }
  508. else {
  509. if ($prioritize_published_nodes) {
  510. if ($duplicate_node->status == 1 && $node_to_keep->status != 1) {
  511. $nodes_to_remove[$node_to_keep->nid] = $node_to_keep;
  512. }
  513. else {
  514. $nodes_to_remove[$duplicate_node->nid] = $duplicate_node;
  515. }
  516. }
  517. else {
  518. $nodes_to_remove[$duplicate_node->nid] = $duplicate_node;
  519. }
  520. }
  521. }
  522. }
  523. unset($node_to_keep);
  524. }
  525. }
  526. if (is_array($nodes_to_remove) && ($nodes_count = count($nodes_to_remove))) {
  527. watchdog('remove_duplicates', 'Batch - Duplicate nodes to remove after filtering : [@count_nodes]', array(
  528. '@count_nodes' => $nodes_count,
  529. ), WATCHDOG_INFO);
  530. return $nodes_to_remove;
  531. }
  532. else {
  533. return array();
  534. }
  535. }
  536. }
  537. return array();
  538. }
  539. /**
  540. * Get the field common name used in node objects.
  541. *
  542. * @param string $node_field_machine_name
  543. * The {field} used to group nodes and therefore create sets of
  544. * duplicate nodes.
  545. *
  546. * @return string
  547. * A field name to use with extracted db records
  548. */
  549. function _remove_duplicates_get_field_common_name($node_field_machine_name) {
  550. if (in_array(strtolower($node_field_machine_name), array('title'))) {
  551. // Basic field.
  552. $field = $node_field_machine_name;
  553. }
  554. else {
  555. // Custom field.
  556. $field_info = field_info_field($node_field_machine_name);
  557. if (!empty($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'])) {
  558. $table = key($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT']);
  559. $field = current($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'][$table]);
  560. if (!(db_table_exists($table) && db_field_exists($table, $field))) {
  561. return NULL;
  562. }
  563. }
  564. else {
  565. return NULL;
  566. }
  567. }
  568. return $field;
  569. }
  570. /**
  571. * Get all duplicate node grouped according to selected field.
  572. *
  573. * @param string $node_type_machine_name
  574. * The node type to fetch.
  575. *
  576. * @param string $node_field_machine_name
  577. * The {field} used to group nodes and therefore create sets of
  578. * duplicate nodes.
  579. *
  580. * @param bool $case_sensitive
  581. * If TRUE, duplicates detection is case sensitive
  582. * Otherwise, duplicates detection is case insensitive.
  583. *
  584. * @return array
  585. * An array of duplicate nodes groups
  586. */
  587. function _remove_duplicates_get_duplicate_node_groups($node_type_machine_name, $node_field_machine_name, $case_sensitive) {
  588. $field = _remove_duplicates_get_field_common_name($node_field_machine_name);
  589. if (empty($field)) {
  590. return array();
  591. }
  592. else {
  593. $records = _remove_duplicates_get_nodes($node_type_machine_name, $node_field_machine_name, $case_sensitive);
  594. if (is_array($records) && count($records)) {
  595. // Creating node groups.
  596. $node_groups = array();
  597. foreach ($records as $record) {
  598. if (is_object($record) && isset($record->$field)) {
  599. // MD5 unicity magic to group nodes according to field content.
  600. if ($case_sensitive) {
  601. $node_groups[md5($record->$field)][] = $record;
  602. }
  603. else {
  604. $node_groups[md5(strtolower($record->$field))][] = $record;
  605. }
  606. }
  607. }
  608. // Keeping only node groups with duplicates.
  609. $duplicate_node_groups = array();
  610. $node_groups_count = $nodes_count = 0;
  611. foreach ($node_groups as $md5key => $node_group) {
  612. if (is_array($node_group) && ($node_group_count = count($node_group)) > 1) {
  613. $nodes_count += $node_group_count;
  614. $duplicate_node_groups[$md5key] = $node_group;
  615. }
  616. }
  617. $node_groups_count = count($duplicate_node_groups);
  618. return array(
  619. 'count' => array(
  620. 'nodes' => $nodes_count,
  621. 'node_groups' => $node_groups_count,
  622. ),
  623. 'data' => $duplicate_node_groups,
  624. );
  625. }
  626. else {
  627. return array(
  628. 'count' => array(
  629. 'nodes' => 0,
  630. 'node_groups' => 0,
  631. ),
  632. 'data' => array(),
  633. );
  634. }
  635. }
  636. }
  637. /**
  638. * Get all nodes with selected type / field.
  639. *
  640. * @param string $node_type_machine_name
  641. * The node type to fetch.
  642. *
  643. * @param string $node_field_machine_name
  644. * The {field} to join with the fetched node type.
  645. *
  646. * @param bool $case_sensitive
  647. * If TRUE, duplicates detection is case sensitive
  648. * Otherwise, duplicates detection is case insensitive.
  649. *
  650. * @return array
  651. * An array of node objects with 3 properties : nid, status, {field}.
  652. */
  653. function _remove_duplicates_get_nodes($node_type_machine_name = NULL, $node_field_machine_name = NULL, $case_sensitive = TRUE) {
  654. $records = array();
  655. // EntityFieldQuery does not support GROUP BY nor DISTINCT.
  656. // See : https://drupal.org/node/1565708
  657. // Using raw database calls instead.
  658. if (in_array(strtolower($node_field_machine_name), array('title'))) {
  659. // For basic title field.
  660. $field = $node_field_machine_name;
  661. if (Database::getConnection()->databaseType() == 'pgsql') {
  662. // In PostgreSQL string comparisons are case sensitive by default.
  663. // Nested Query pattern (case sensitive) :
  664. // @code
  665. // SELECT s.{field} AS {field},
  666. // COUNT( * ) AS duplicate
  667. // FROM node s
  668. // WHERE (
  669. // s.type = {node_type_machine_name}
  670. // )
  671. // GROUP BY {$field}
  672. // @endcode
  673. // Nested Query pattern (case insensitive) :
  674. // @code
  675. // SELECT s.lowered as {field},
  676. // SUM(s.duplicate) AS duplicate
  677. // FROM (
  678. // SELECT s.{field} AS {field}, lower(s.{field}) AS lowered,
  679. // COUNT( * ) AS duplicate
  680. // FROM node s
  681. // WHERE (
  682. // s.type = {node_type_machine_name}
  683. // )
  684. // GROUP BY lowered, {field}
  685. // ) s
  686. // GROUP BY lowered
  687. // @endcode
  688. $nested_query = db_select('node', 's');
  689. $nested_query->fields('s', array($field));
  690. $nested_query->addExpression('COUNT(*)', 'duplicate');
  691. $nested_query->condition('s.type', $node_type_machine_name, '=');
  692. if ($case_sensitive) {
  693. $nested_query->groupBy($field);
  694. }
  695. else {
  696. $nested_query->addExpression('lower(s.' . $field . ')', 'lowered');
  697. $nested_query->groupBy('lowered');
  698. $nested_query->groupBy($field);
  699. $nested_query = db_select($nested_query, 's');
  700. $nested_query->addExpression('s.lowered', $field);
  701. $nested_query->addExpression('SUM(s.duplicate)', 'duplicate');
  702. $nested_query->groupBy('lowered');
  703. }
  704. // Sub Query pattern (both cases) :
  705. // @code
  706. // SELECT n.{field} AS {field}
  707. // FROM {$nested_query} n
  708. // WHERE duplicates > 1
  709. // @endcode
  710. $sub_query = db_select($nested_query, 'n');
  711. $sub_query->fields('n', array($field));
  712. $sub_query->condition('duplicate', 1, '>');
  713. }
  714. else {
  715. // In MySQL nonbinary string comparisons are case insensitive by default.
  716. // See : https://dev.mysql.com/doc/refman/5.0/en/case-sensitivity.html
  717. // Sub Query pattern (case sensitive) :
  718. // @code
  719. // SELECT n.{field} AS {field}, md5(n.{field}) AS checksum,
  720. // COUNT( * ) AS duplicate
  721. // FROM node n
  722. // WHERE (
  723. // n.type = {node_type_machine_name}
  724. // )
  725. // GROUP BY checksum
  726. // HAVING count( duplicate ) >1
  727. // @endcode
  728. // Sub Query pattern (case insensitive) :
  729. // @code
  730. // SELECT n.{field} AS {field},
  731. // COUNT( * ) AS duplicate
  732. // FROM node n
  733. // WHERE (
  734. // n.type = {node_type_machine_name}
  735. // )
  736. // GROUP BY {field}
  737. // HAVING count( duplicate ) > 1
  738. // @endcode
  739. $sub_query = db_select('node', 'n');
  740. $sub_query->fields('n', array($field));
  741. $sub_query->addExpression('COUNT(*)', 'duplicate');
  742. if ($case_sensitive) {
  743. $sub_query->addExpression('md5(n.' . $field . ')', 'checksum');
  744. $sub_query->groupBy('checksum');
  745. }
  746. else {
  747. $sub_query->groupBy($field);
  748. }
  749. $sub_query->condition('n.type', $node_type_machine_name, '=');
  750. $sub_query->havingCondition('duplicate', 1, '>');
  751. }
  752. // Main Query pattern (case sensitive) :
  753. // @code
  754. // SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
  755. // n.created AS created, n.changed AS changed,
  756. // n.{field} AS {field}, u.name AS name
  757. // FROM node n
  758. // INNER JOIN users u ON n.uid = u.uid
  759. // INNER JOIN (
  760. // {sub_query}
  761. // ) nn ON md5(n.{field}) = md5(nn.{field})
  762. // WHERE (
  763. // n.type = {node_type_machine_name}
  764. // )
  765. // ORDER BY {field} ASC
  766. // @endcode
  767. // Main Query pattern (case insensitive) :
  768. // @code
  769. // SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
  770. // n.created AS created, n.changed AS changed,
  771. // n.{field} AS {field}, u.name AS name
  772. // FROM node n
  773. // INNER JOIN users u ON n.uid = u.uid
  774. // INNER JOIN (
  775. // {sub_query}
  776. // ) nn ON n.{field} = nn.{field}
  777. // WHERE (
  778. // n.type = {node_type_machine_name}
  779. // )
  780. // ORDER BY {field} ASC
  781. // @endcode
  782. $main_query = db_select('node', 'n');
  783. $main_query->fields('n', array(
  784. 'nid',
  785. 'uid',
  786. 'status',
  787. 'created',
  788. 'changed',
  789. $field,
  790. ));
  791. $main_query->fields('u', array('name'));
  792. $main_query->join('users', 'u', 'n.uid = u.uid');
  793. if ($case_sensitive) {
  794. $main_query->join($sub_query, 'nn', 'md5(n.' . $field . ') = md5(nn.' . $field . ')');
  795. }
  796. else {
  797. if (Database::getConnection()->databaseType() == 'pgsql') {
  798. $main_query->join($sub_query, 'nn', 'lower(n.' . $field . ') = lower(nn.' . $field . ')');
  799. }
  800. else {
  801. $main_query->join($sub_query, 'nn', 'n.' . $field . ' = nn.' . $field);
  802. }
  803. }
  804. $main_query->condition('n.type', $node_type_machine_name, '=');
  805. $main_query->orderBy($field, 'ASC');
  806. $records = $main_query->execute()->fetchall();
  807. }
  808. else {
  809. // For Custom Fields.
  810. $field_info = field_info_field($node_field_machine_name);
  811. if (!empty($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'])) {
  812. $table = key($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT']);
  813. $field = current($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'][$table]);
  814. if (db_table_exists($table) && db_field_exists($table, $field)) {
  815. if (Database::getConnection()->databaseType() == 'pgsql') {
  816. // Nested Query pattern (case sensitive) :
  817. // @code
  818. // SELECT cf.{field} AS {field},
  819. // COUNT( * ) AS duplicate
  820. // FROM node s
  821. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  822. // AND cf.entity_type = 'node' AND cf.entity_id = s.nid
  823. // WHERE (
  824. // s.type = {node_type_machine_name}
  825. // )
  826. // GROUP BY {$field}
  827. // @endcode
  828. // Nested Query pattern (case insensitive) :
  829. // @code
  830. // SELECT s.lowered as {field},
  831. // SUM(s.duplicate) AS duplicate
  832. // FROM (
  833. // SELECT cf.{field} AS {field}, lower(cf.{field}) AS lowered,
  834. // COUNT( * ) AS duplicate
  835. // FROM node s
  836. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  837. // AND cf.entity_type = 'node' AND cf.entity_id = s.nid
  838. // WHERE (
  839. // s.type = {node_type_machine_name}
  840. // )
  841. // GROUP BY lowered, {field}
  842. // ) s
  843. // GROUP BY lowered
  844. // @endcode
  845. $nested_query = db_select('node', 's');
  846. $nested_query->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND s.nid = cf.entity_id ', array(
  847. ':bundle' => $node_type_machine_name,
  848. ':entity_type' => 'node',
  849. ));
  850. $nested_query->fields('cf', array($field));
  851. $nested_query->addExpression('COUNT(*)', 'duplicate');
  852. $nested_query->condition('s.type', $node_type_machine_name, '=');
  853. if ($case_sensitive) {
  854. $nested_query->groupBy($field);
  855. }
  856. else {
  857. $nested_query->addExpression('lower(cf.' . $field . ')', 'lowered');
  858. $nested_query->groupBy('lowered');
  859. $nested_query->groupBy($field);
  860. $nested_query = db_select($nested_query, 's');
  861. $nested_query->addExpression('s.lowered', $field);
  862. $nested_query->addExpression('SUM(s.duplicate)', 'duplicate');
  863. $nested_query->groupBy('lowered');
  864. }
  865. // Sub Query pattern (both cases) :
  866. // @code
  867. // SELECT cf.{field} AS {field}
  868. // FROM {$nested_query} cf
  869. // WHERE duplicates > 1
  870. // @endcode
  871. $sub_query = db_select($nested_query, 'cf');
  872. $sub_query->fields('cf', array($field));
  873. $sub_query->condition('duplicate', 1, '>');
  874. }
  875. else {
  876. // Sub Query pattern (case sensitive) :
  877. // @code
  878. // SELECT cf.{field} AS {field}, md5(cf.{field}) AS checksum,
  879. // COUNT( * ) AS duplicate
  880. // FROM node n
  881. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  882. // AND cf.entity_type = 'node' AND cf.entity_id = n.nid
  883. // WHERE (
  884. // n.type = {node_type_machine_name}
  885. // )
  886. // GROUP BY checksum
  887. // HAVING count( duplicate ) > 1
  888. // @endcode
  889. // Sub Query pattern (case insensitive) :
  890. // @code
  891. // SELECT cf.{field} AS {field},
  892. // COUNT( * ) AS duplicate
  893. // FROM node n
  894. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  895. // AND cf.entity_type = 'node' AND cf.entity_id = n.nid
  896. // WHERE (
  897. // n.type = {node_type_machine_name}
  898. // )
  899. // GROUP BY {field}
  900. // HAVING count( duplicate ) > 1
  901. // @endcode
  902. $sub_query = db_select('node', 'n');
  903. $sub_query->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND n.nid = cf.entity_id ', array(
  904. ':bundle' => $node_type_machine_name,
  905. ':entity_type' => 'node',
  906. ));
  907. $sub_query->condition('n.type', $node_type_machine_name, '=');
  908. $sub_query->fields('cf', array($field));
  909. $sub_query->addExpression('COUNT(*)', 'duplicate');
  910. if ($case_sensitive) {
  911. $sub_query->addExpression('md5(cf.' . $field . ')', 'checksum');
  912. $sub_query->groupBy('checksum');
  913. }
  914. else {
  915. $sub_query->groupBy($field);
  916. }
  917. $sub_query->havingCondition('duplicate', 1, '>');
  918. }
  919. // Main Query pattern (case sensitive) :
  920. // @code
  921. // SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
  922. // n.created AS created, n.changed AS changed, n.title AS title,
  923. // u.name AS name, cf.{field} AS {field}
  924. // FROM node n
  925. // INNER JOIN users u ON n.uid = u.uid
  926. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  927. // AND cf.entity_type = 'node' AND cf.entity_id = n.nid
  928. // INNER JOIN (
  929. // {sub_query}
  930. // ) nn ON md5(cf.{field}) = md5(nn.{field})
  931. // WHERE (
  932. // n.type = {node_type_machine_name}
  933. // )
  934. // ORDER BY {field} ASC
  935. // @endcode
  936. // Main Query pattern (case insensitive) :
  937. // @code
  938. // SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
  939. // n.created AS created, n.changed AS changed, n.title AS title,
  940. // u.name AS name, cf.{field} AS {field}
  941. // FROM node n
  942. // INNER JOIN users u ON n.uid = u.uid
  943. // INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
  944. // AND cf.entity_type = 'node' AND cf.entity_id = n.nid
  945. // INNER JOIN (
  946. // {sub_query}
  947. // ) nn ON cf.{field} = nn.{field}
  948. // WHERE (
  949. // n.type = {node_type_machine_name}
  950. // )
  951. // ORDER BY {field} ASC
  952. // @endcode
  953. $main_query = db_select('node', 'n');
  954. $main_query->fields('n', array(
  955. 'nid',
  956. 'uid',
  957. 'status',
  958. 'created',
  959. 'changed',
  960. 'title',
  961. ));
  962. $main_query->fields('u', array('name'));
  963. $main_query->fields('cf', array($field));
  964. $main_query->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND n.nid = cf.entity_id ', array(
  965. ':bundle' => $node_type_machine_name,
  966. ':entity_type' => 'node',
  967. ));
  968. $main_query->join('users', 'u', 'n.uid = u.uid');
  969. if ($case_sensitive) {
  970. $main_query->join($sub_query, 'nn', 'md5(cf.' . $field . ') = md5(nn.' . $field . ')');
  971. }
  972. else {
  973. if (Database::getConnection()->databaseType() == 'pgsql') {
  974. $main_query->join($sub_query, 'nn', 'lower(cf.' . $field . ') = lower(nn.' . $field . ')');
  975. }
  976. else {
  977. $main_query->join($sub_query, 'nn', 'cf.' . $field . ' = nn.' . $field);
  978. }
  979. }
  980. $main_query->condition('n.type', $node_type_machine_name, '=');
  981. $main_query->orderBy($field, 'ASC');
  982. $records = $main_query->execute()->fetchall();
  983. }
  984. }
  985. }
  986. return $records;
  987. }
  988. /**
  989. * Get all available fields for each node types.
  990. *
  991. * @return array
  992. * An array of node types fields
  993. */
  994. function _remove_duplicates_get_node_types_fields() {
  995. $node_types_fields = array();
  996. $node_types = node_type_get_names();
  997. foreach ($node_types as $machine_name => $human_readable_name) {
  998. $node_types_fields[$machine_name] = array();
  999. $node_types_fields[$machine_name]['title'] = t('Title');
  1000. $field_info = field_info_instances('node', $machine_name);
  1001. if (is_array($field_info)) {
  1002. foreach ($field_info as $field) {
  1003. if (isset($field['label']) && isset($field['field_name'])) {
  1004. $node_types_fields[$machine_name][$field['field_name']] = $field['label'];
  1005. }
  1006. }
  1007. }
  1008. }
  1009. return $node_types_fields;
  1010. }
  1011. /**
  1012. * Get a list themed output (Legacy output).
  1013. *
  1014. * @param string $node_type_machine_name
  1015. * The fetched node type.
  1016. *
  1017. * @param string $node_field_machine_name
  1018. * The {field} used to find duplicates.
  1019. *
  1020. * @param bool $prioritize_published_nodes
  1021. * If TRUE, the last published node in a set of duplicate nodes will be kept.
  1022. * Otherwise, the first node in a set of duplicate nodes will be kept.
  1023. *
  1024. * @return array
  1025. * An array with 2 keys :
  1026. * #markup An HTML string representing the list themed output.
  1027. * #proceed A boolean indicating whether or not duplicates were found.
  1028. */
  1029. function _remove_duplicates_get_list_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive) {
  1030. $node_types = node_type_get_names();
  1031. $node_types_fields = _remove_duplicates_get_node_types_fields();
  1032. $duplicate_node_groups = _remove_duplicates_get_duplicate_node_groups($node_type_machine_name, $node_field_machine_name, $case_sensitive);
  1033. if (is_array($duplicate_node_groups)) {
  1034. if (isset($duplicate_node_groups['count']) && is_array($duplicate_node_groups['count'])) {
  1035. $count_nodes = (array_key_exists('nodes', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['nodes'] : 0;
  1036. $count_node_groups = (array_key_exists('node_groups', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['node_groups'] : 0;
  1037. watchdog('remove_duplicates', 'Search - Found duplicate nodes : [@count_nodes] | node groups : [@count_node_groups]', array(
  1038. '@count_nodes' => $count_nodes,
  1039. '@count_node_groups' => $count_node_groups,
  1040. ), WATCHDOG_INFO);
  1041. watchdog('remove_duplicates', 'Search - Duplicate nodes to remove estimate : [@nodes_to_remove_estimate]', array(
  1042. '@nodes_to_remove_estimate' => $count_nodes - $count_node_groups,
  1043. ), WATCHDOG_INFO);
  1044. }
  1045. if (isset($duplicate_node_groups['data']) && is_array($duplicate_node_groups['data']) && count($duplicate_node_groups['data'])) {
  1046. // Outputs an HTML list of duplicates found.
  1047. $list_output = array();
  1048. foreach ($duplicate_node_groups['data'] as $duplicate_node_group) {
  1049. $items = array();
  1050. $node_group_title = NULL;
  1051. $nodetypefieldvalue = array();
  1052. $keep_node_nid = $keep_node_status = NULL;
  1053. $field = _remove_duplicates_get_field_common_name($node_field_machine_name);
  1054. foreach ($duplicate_node_group as $duplicate_node) {
  1055. // Defining the group title.
  1056. $nodetypefieldvalue[$duplicate_node->$field] = $duplicate_node->$field;
  1057. // Defaults to keeping the first node in the group.
  1058. if (empty($keep_node_nid)) {
  1059. $keep_node_status = $duplicate_node->status;
  1060. $keep_node_nid = $duplicate_node->nid;
  1061. }
  1062. // Defining the node which is going to be kept among duplicates.
  1063. if (isset($keep_node_nid)) {
  1064. // If "Keep at least one published node." is checked
  1065. // keeping the first published node in the group.
  1066. if ($prioritize_published_nodes) {
  1067. if ($duplicate_node->status && !$keep_node_status) {
  1068. $keep_node_nid = $duplicate_node->nid;
  1069. $keep_node_status = $duplicate_node->status;
  1070. }
  1071. }
  1072. }
  1073. }
  1074. // Defining the group title.
  1075. if (empty($node_group_title) && is_array($nodetypefieldvalue)) {
  1076. $node_group_title = t('Duplicates where "@nodetypefield" = @nodetypefieldvalue', array(
  1077. '@nodetypefieldvalue' => '"' . implode('" ' . t('or') . ' "', $nodetypefieldvalue) . '"',
  1078. '@nodetypefield' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1079. ));
  1080. }
  1081. reset($duplicate_node_group);
  1082. foreach ($duplicate_node_group as $duplicate_node) {
  1083. $node_title = l($duplicate_node->title, 'node/' . $duplicate_node->nid, array('attributes' => array('onclick' => 'window.open(this.href,parseInt(Math.random()*1000));return false;')));
  1084. $node_status = array();
  1085. if ($duplicate_node->status) {
  1086. $node_status[] = '<span style="color:black;">' . t('Published') . '</span>';
  1087. }
  1088. else {
  1089. $node_status[] = '<span style="color:gray;">' . t('Unpublished') . '</span>';
  1090. }
  1091. if (isset($keep_node_nid) && $keep_node_nid == $duplicate_node->nid) {
  1092. $node_status[] = '<span style="color:green;">' . t('Will be kept.') . '</span>';
  1093. }
  1094. else {
  1095. $node_status[] = '<span style="color:red;">' . t('Will be deleted.') . '</span>';
  1096. }
  1097. $items[] = theme('item_list', array(
  1098. 'items' => $node_status,
  1099. 'title' => $node_title,
  1100. 'type' => 'ul',
  1101. 'attributes' => array(),
  1102. ));
  1103. }
  1104. $list_output[] = theme('item_list', array(
  1105. 'items' => $items,
  1106. 'title' => $node_group_title,
  1107. 'type' => 'ul',
  1108. 'attributes' => array(),
  1109. ));
  1110. }
  1111. $main_title = t('Duplicates found for node type "@nodetype" according to field "@nodetypefield" :', array(
  1112. '@nodetype' => ((string) $node_types[$node_type_machine_name]),
  1113. '@nodetypefield' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1114. ));
  1115. $list_output = theme('item_list', array(
  1116. 'items' => $list_output,
  1117. 'title' => $main_title,
  1118. 'type' => 'ul',
  1119. 'attributes' => array(
  1120. 'style' => 'font-size:0.9em;',
  1121. ),
  1122. ));
  1123. $output = array(
  1124. '#proceed' => TRUE,
  1125. '#element' => array(
  1126. '#type' => 'item',
  1127. '#title' => t('Results'),
  1128. '#markup' => $list_output,
  1129. ),
  1130. );
  1131. return $output;
  1132. }
  1133. }
  1134. watchdog('remove_duplicates', 'Search - No duplicates found for node [@node_type_machine_name] according to field [@node_field_machine_name]', array(
  1135. '@node_type_machine_name' => $node_type_machine_name,
  1136. '@node_field_machine_name' => $node_field_machine_name,
  1137. ), WATCHDOG_INFO);
  1138. // Outputs a no duplicates found message.
  1139. $text_output = t('No duplicates for node type "@nodetype" according to field "@nodetypefield".', array(
  1140. '@nodetype' => ((string) $node_types[$node_type_machine_name]),
  1141. '@nodetypefield' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1142. ));
  1143. $html_output = '<div class="description"><p><span style="font-weight:bold;color:red;">' . $text_output . '</span></p></div>';
  1144. $output = array(
  1145. '#proceed' => FALSE,
  1146. '#element' => array(
  1147. '#type' => 'item',
  1148. '#title' => t('Results'),
  1149. '#markup' => $html_output,
  1150. ),
  1151. );
  1152. return $output;
  1153. }
  1154. /**
  1155. * Get a table themed output (Legacy output).
  1156. *
  1157. * @param string $node_type_machine_name
  1158. * The fetched node type.
  1159. *
  1160. * @param string $node_field_machine_name
  1161. * The {field} used to find duplicates.
  1162. *
  1163. * @param bool $prioritize_published_nodes
  1164. * If TRUE, the last published node in a set of duplicate nodes will be kept.
  1165. * Otherwise, the first node in a set of duplicate nodes will be kept.
  1166. *
  1167. * @param bool $case_sensitive
  1168. * If TRUE, duplicates detection is case sensitive
  1169. * Otherwise, duplicates detection is case insensitive.
  1170. *
  1171. * @return array
  1172. * An array with 2 keys :
  1173. * #markup An HTML string representing the table themed output.
  1174. * #proceed A boolean indicating whether or not duplicates were found.
  1175. */
  1176. function _remove_duplicates_get_table_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive) {
  1177. $node_types = node_type_get_names();
  1178. $node_types_fields = _remove_duplicates_get_node_types_fields();
  1179. $duplicate_node_groups = _remove_duplicates_get_duplicate_node_groups($node_type_machine_name, $node_field_machine_name, $case_sensitive);
  1180. if (is_array($duplicate_node_groups)) {
  1181. if (isset($duplicate_node_groups['count']) && is_array($duplicate_node_groups['count'])) {
  1182. $count_nodes = (array_key_exists('nodes', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['nodes'] : 0;
  1183. $count_node_groups = (array_key_exists('node_groups', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['node_groups'] : 0;
  1184. watchdog('remove_duplicates', 'Search - Found duplicate nodes : [@count_nodes] | node groups : [@count_node_groups]', array(
  1185. '@count_nodes' => $count_nodes,
  1186. '@count_node_groups' => $count_node_groups,
  1187. ), WATCHDOG_INFO);
  1188. watchdog('remove_duplicates', 'Search - Duplicate nodes to remove estimate : [@nodes_to_remove_estimate]', array(
  1189. '@nodes_to_remove_estimate' => $count_nodes - $count_node_groups,
  1190. ), WATCHDOG_INFO);
  1191. }
  1192. if (isset($duplicate_node_groups['data']) && is_array($duplicate_node_groups['data']) && count($duplicate_node_groups['data'])) {
  1193. // Outputs an table of duplicates found.
  1194. $table_output = NULL;
  1195. // Construction of duplicate node group tables.
  1196. $duplicate_node_group_table_rows = array();
  1197. $duplicate_node_group_table_header = array(
  1198. array(
  1199. 'header' => TRUE,
  1200. 'data' => t('remove'),
  1201. ),
  1202. array(
  1203. 'header' => TRUE,
  1204. 'data' => t('title'),
  1205. ),
  1206. array(
  1207. 'header' => TRUE,
  1208. 'data' => t('author'),
  1209. ),
  1210. array(
  1211. 'header' => TRUE,
  1212. 'data' => t('published'),
  1213. ),
  1214. array(
  1215. 'header' => TRUE,
  1216. 'data' => t('updated'),
  1217. ),
  1218. array(
  1219. 'header' => TRUE,
  1220. 'data' => t('created'),
  1221. ),
  1222. );
  1223. $duplicate_node_group_table_rows[] = array(
  1224. array(
  1225. 'header' => TRUE,
  1226. 'data' => t('Found Duplicates'),
  1227. 'colspan' => count($duplicate_node_group_table_header),
  1228. ),
  1229. );
  1230. $duplicate_node_group_table_rows[] = array(
  1231. array(
  1232. 'header' => TRUE,
  1233. 'data' => t('For node type'),
  1234. ),
  1235. array(
  1236. 'data' => ((string) $node_types[$node_type_machine_name]),
  1237. ),
  1238. array(
  1239. 'header' => TRUE,
  1240. 'data' => t('With field name'),
  1241. ),
  1242. array(
  1243. 'colspan' => count($duplicate_node_group_table_header) - 3,
  1244. 'data' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1245. ),
  1246. );
  1247. foreach ($duplicate_node_groups['data'] as $duplicate_node_group) {
  1248. // Defining the default duplicate group title.
  1249. $first_duplicate_node = NULL;
  1250. $node_group_title = $node_group_field_name = $node_group_field_value = NULL;
  1251. if (is_array($duplicate_node_group) && count($duplicate_node_group)) {
  1252. $field_common_name = _remove_duplicates_get_field_common_name($node_field_machine_name);
  1253. $node_group_field_value = array();
  1254. foreach ($duplicate_node_group as $duplicate_node) {
  1255. if (is_object($duplicate_node) && !empty($duplicate_node->$field_common_name)) {
  1256. $node_group_field_value[((string) $duplicate_node->$field_common_name)] = ((string) $duplicate_node->$field_common_name);
  1257. }
  1258. }
  1259. $node_group_field_name = ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]);
  1260. $node_group_field_value = '"' . implode('" ' . t('or') . ' "', $node_group_field_value) . '"';
  1261. $node_group_title = t('Where "@nodetypefield" is', array(
  1262. '@nodetypefield' => $node_group_field_name,
  1263. ));
  1264. }
  1265. // Defining the default kept node among duplicates.
  1266. $keep_node_nid = $keep_node_status = NULL;
  1267. foreach ($duplicate_node_group as $duplicate_node) {
  1268. // Defaults to keeping the first node in the group.
  1269. if (empty($keep_node_nid)) {
  1270. $keep_node_nid = $duplicate_node->nid;
  1271. $keep_node_status = $duplicate_node->status;
  1272. }
  1273. // Defining the node which is going to be kept among duplicates.
  1274. if (isset($keep_node_nid)) {
  1275. // If "Keep at least one published node." is checked
  1276. // keeping the first published node in the group.
  1277. if ($prioritize_published_nodes) {
  1278. if ($duplicate_node->status && !$keep_node_status) {
  1279. $keep_node_nid = $duplicate_node->nid;
  1280. $keep_node_status = $duplicate_node->status;
  1281. }
  1282. }
  1283. }
  1284. }
  1285. reset($duplicate_node_group);
  1286. // Defining the group table first rows.
  1287. $duplicate_node_group_table_rows[] = array(
  1288. array(
  1289. 'header' => TRUE,
  1290. 'data' => $node_group_title,
  1291. ),
  1292. array(
  1293. 'data' => $node_group_field_value,
  1294. 'colspan' => count($duplicate_node_group_table_header) - 1,
  1295. ),
  1296. );
  1297. $duplicate_node_group_table_rows[] = $duplicate_node_group_table_header;
  1298. // Construction of duplicate node group table.
  1299. foreach ($duplicate_node_group as $duplicate_node) {
  1300. $duplicate_node_group_table_row = array();
  1301. // Data for 'remove' column :
  1302. if (isset($keep_node_nid) && $keep_node_nid == $duplicate_node->nid) {
  1303. $duplicate_node_group_table_row[] = array(
  1304. 'data' => '<span style="color:red;">' . t('No') . '</span>',
  1305. );
  1306. }
  1307. else {
  1308. $duplicate_node_group_table_row[] = array(
  1309. 'data' => '<span style="color:green;">' . t('Yes') . '</span>',
  1310. );
  1311. }
  1312. // Data for 'title' column :
  1313. $duplicate_node_group_table_row[] = array(
  1314. 'data' => l($duplicate_node->title, 'node/' . $duplicate_node->nid, array('attributes' => array('onclick' => 'window.open(this.href,parseInt(Math.random()*1000));return false;'))),
  1315. );
  1316. // Data for 'author' column :
  1317. $duplicate_node_group_table_row[] = array(
  1318. 'data' => $duplicate_node->name,
  1319. );
  1320. // Data for 'status' column :
  1321. if ($duplicate_node->status) {
  1322. $duplicate_node_group_table_row[] = array(
  1323. 'data' => '<span style="color:black;">' . t('Yes') . '</span>',
  1324. );
  1325. }
  1326. else {
  1327. $duplicate_node_group_table_row[] = array(
  1328. 'data' => '<span style="color:gray;">' . t('No') . '</span>',
  1329. );
  1330. }
  1331. // Data for 'updated' column :
  1332. $duplicate_node_group_table_row[] = array(
  1333. 'data' => format_date($duplicate_node->changed),
  1334. );
  1335. // Data for 'created' column :
  1336. $duplicate_node_group_table_row[] = array(
  1337. 'data' => format_date($duplicate_node->created),
  1338. );
  1339. // Adding new row to rows array.
  1340. $duplicate_node_group_table_rows[] = $duplicate_node_group_table_row;
  1341. }
  1342. // End of duplicate node group table construction.
  1343. }
  1344. $table_output = theme('table', array(
  1345. 'rows' => $duplicate_node_group_table_rows,
  1346. ));
  1347. $output = array(
  1348. '#proceed' => TRUE,
  1349. '#element' => array(
  1350. '#type' => 'item',
  1351. '#title' => t('Results'),
  1352. '#markup' => $table_output,
  1353. ),
  1354. );
  1355. return $output;
  1356. }
  1357. }
  1358. watchdog('remove_duplicates', 'Search - No duplicates found for node [@node_type_machine_name] according to field [@node_field_machine_name]', array(
  1359. '@node_type_machine_name' => $node_type_machine_name,
  1360. '@node_field_machine_name' => $node_field_machine_name,
  1361. ), WATCHDOG_INFO);
  1362. // Outputs a no duplicates found message.
  1363. $text_output = t('No duplicates for node type "@nodetype" according to field "@nodetypefield".', array(
  1364. '@nodetype' => ((string) $node_types[$node_type_machine_name]),
  1365. '@nodetypefield' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1366. ));
  1367. $html_output = '<div class="description"><p><span style="font-weight:bold;color:red;">' . $text_output . '</span></p></div>';
  1368. $output = array(
  1369. '#proceed' => FALSE,
  1370. '#element' => array(
  1371. '#type' => 'item',
  1372. '#title' => t('Results'),
  1373. '#markup' => $html_output,
  1374. ),
  1375. );
  1376. return $output;
  1377. }
  1378. /**
  1379. * Get a tableselect data output.
  1380. *
  1381. * @param string $node_type_machine_name
  1382. * The fetched node type.
  1383. *
  1384. * @param string $node_field_machine_name
  1385. * The {field} used to find duplicates.
  1386. *
  1387. * @param bool $prioritize_published_nodes
  1388. * If TRUE, the last published node in a set of duplicate nodes will be kept.
  1389. * Otherwise, the first node in a set of duplicate nodes will be kept.
  1390. *
  1391. * @param bool $case_sensitive
  1392. * If TRUE, duplicates detection is case sensitive
  1393. * Otherwise, duplicates detection is case insensitive.
  1394. *
  1395. * @return array
  1396. * An array with 2 keys :
  1397. * #element A datastructure to use with custom tableselect form element.
  1398. * #proceed A boolean indicating whether or not duplicates were found.
  1399. */
  1400. function _remove_duplicates_get_tableselect_output($node_type_machine_name, $node_field_machine_name, $prioritize_published_nodes, $case_sensitive) {
  1401. $node_types = node_type_get_names();
  1402. $node_types_fields = _remove_duplicates_get_node_types_fields();
  1403. $duplicate_node_groups = _remove_duplicates_get_duplicate_node_groups($node_type_machine_name, $node_field_machine_name, $case_sensitive);
  1404. if (is_array($duplicate_node_groups)) {
  1405. if (isset($duplicate_node_groups['count']) && is_array($duplicate_node_groups['count'])) {
  1406. $count_nodes = (array_key_exists('nodes', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['nodes'] : 0;
  1407. $count_node_groups = (array_key_exists('node_groups', $duplicate_node_groups['count'])) ? $duplicate_node_groups['count']['node_groups'] : 0;
  1408. watchdog('remove_duplicates', 'Search - Found duplicate nodes : [@count_nodes] | node groups : [@count_node_groups]', array(
  1409. '@count_nodes' => $count_nodes,
  1410. '@count_node_groups' => $count_node_groups,
  1411. ), WATCHDOG_INFO);
  1412. watchdog('remove_duplicates', 'Search - Duplicate nodes to remove estimate : [@nodes_to_remove_estimate]', array(
  1413. '@nodes_to_remove_estimate' => $count_nodes - $count_node_groups,
  1414. ), WATCHDOG_INFO);
  1415. }
  1416. if (isset($duplicate_node_groups['data']) && is_array($duplicate_node_groups['data']) && count($duplicate_node_groups['data'])) {
  1417. // Outputs a data structure to use with
  1418. // Remove Duplicates specific tableselect.
  1419. // Default duplicate node group table header.
  1420. $duplicate_node_group_table_header = array(
  1421. 'remove' => t('remove'),
  1422. 'title' => t('title'),
  1423. 'author' => t('author'),
  1424. 'status' => t('published'),
  1425. 'updated' => t('updated'),
  1426. 'created' => t('created'),
  1427. );
  1428. // Construction of duplicate node group tables.
  1429. $duplicate_node_group_tables = array();
  1430. // Construction of duplicate node group tables title.
  1431. $duplicate_node_group_tables['#header'] = array(
  1432. array(
  1433. 'header' => TRUE,
  1434. 'data' => t('Found Duplicates'),
  1435. 'colspan' => count($duplicate_node_group_table_header),
  1436. ),
  1437. );
  1438. // Construction of duplicate node group tables.
  1439. $duplicate_node_group_tables['#tables'] = array();
  1440. foreach ($duplicate_node_groups['data'] as $duplicate_node_group) {
  1441. // Construction of duplicate node group table.
  1442. $duplicate_node_group_table = array();
  1443. // Construction of duplicate node group table header.
  1444. $duplicate_node_group_table['#header'] = array();
  1445. // Defining the duplicate group header prefix.
  1446. $node_group_title = $node_group_field_name = $node_group_field_value = NULL;
  1447. if (is_array($duplicate_node_group) && count($duplicate_node_group)) {
  1448. $field_common_name = _remove_duplicates_get_field_common_name($node_field_machine_name);
  1449. $node_group_field_value = array();
  1450. foreach ($duplicate_node_group as $duplicate_node) {
  1451. if (is_object($duplicate_node) && !empty($duplicate_node->$field_common_name)) {
  1452. $node_group_field_value[((string) $duplicate_node->$field_common_name)] = ((string) $duplicate_node->$field_common_name);
  1453. }
  1454. }
  1455. $node_group_field_name = ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]);
  1456. $node_group_field_value = '"' . implode('" ' . t('or') . ' "', $node_group_field_value) . '"';
  1457. $node_group_title = t('Where "@nodetypefield" is', array(
  1458. '@nodetypefield' => $node_group_field_name,
  1459. ));
  1460. }
  1461. $duplicate_node_group_table['#header']['#prefix'] = array(
  1462. array(
  1463. 'header' => TRUE,
  1464. 'data' => $node_group_title,
  1465. ),
  1466. array(
  1467. 'data' => $node_group_field_value,
  1468. 'colspan' => count($duplicate_node_group_table_header) - 1,
  1469. ),
  1470. );
  1471. // Defining the duplicate group header root.
  1472. $duplicate_node_group_table['#header']['#root'] = $duplicate_node_group_table_header;
  1473. // Defining the default kept node among duplicates.
  1474. $keep_node_nid = $keep_node_status = NULL;
  1475. foreach ($duplicate_node_group as $duplicate_node) {
  1476. // Defaults to keeping the first node in the group.
  1477. if (empty($keep_node_nid)) {
  1478. $keep_node_nid = $duplicate_node->nid;
  1479. $keep_node_status = $duplicate_node->status;
  1480. }
  1481. // Defining the node which is going to be kept among duplicates.
  1482. if (isset($keep_node_nid)) {
  1483. // If "Keep at least one published node." is checked
  1484. // keeping the first published node in the group.
  1485. if ($prioritize_published_nodes) {
  1486. if ($duplicate_node->status && !$keep_node_status) {
  1487. $keep_node_nid = $duplicate_node->nid;
  1488. $keep_node_status = $duplicate_node->status;
  1489. }
  1490. }
  1491. }
  1492. }
  1493. reset($duplicate_node_group);
  1494. // Defining the duplicate group options.
  1495. $duplicate_node_group_table['#value'] = array();
  1496. $duplicate_node_group_table['#options'] = array();
  1497. $duplicate_node_group_table['#default_value'] = array();
  1498. foreach ($duplicate_node_group as $duplicate_node) {
  1499. $duplicate_node_group_table['#options'][$duplicate_node->nid] = array();
  1500. // Data for 'remove' column :
  1501. $duplicate_node_group_table['#value'][$duplicate_node->nid] = $duplicate_node->nid;
  1502. if (isset($keep_node_nid) && $keep_node_nid == $duplicate_node->nid) {
  1503. $duplicate_node_group_table['#options'][$duplicate_node->nid]['remove'] = '<span style="color:red;">' . t('No') . '</span>';
  1504. }
  1505. else {
  1506. $duplicate_node_group_table['#default_value'][$duplicate_node->nid] = $duplicate_node->nid;
  1507. $duplicate_node_group_table['#options'][$duplicate_node->nid]['remove'] = '<span style="color:green;">' . t('Yes') . '</span>';
  1508. }
  1509. // Data for 'title' column :
  1510. $duplicate_node_group_table['#options'][$duplicate_node->nid]['title'] = l($duplicate_node->title, 'node/' . $duplicate_node->nid, array('attributes' => array('onclick' => 'window.open(this.href,parseInt(Math.random()*1000));return false;')));
  1511. // Data for 'author' column :
  1512. $duplicate_node_group_table['#options'][$duplicate_node->nid]['author'] = $duplicate_node->name;
  1513. // Data for 'status' column :
  1514. if ($duplicate_node->status) {
  1515. $duplicate_node_group_table['#options'][$duplicate_node->nid]['status'] = '<span style="color:black;">' . t('Yes') . '</span>';
  1516. }
  1517. else {
  1518. $duplicate_node_group_table['#options'][$duplicate_node->nid]['status'] = '<span style="color:gray;">' . t('No') . '</span>';
  1519. }
  1520. // Data for 'updated' column :
  1521. $duplicate_node_group_table['#options'][$duplicate_node->nid]['updated'] = format_date($duplicate_node->changed);
  1522. // Data for 'created' column :
  1523. $duplicate_node_group_table['#options'][$duplicate_node->nid]['created'] = format_date($duplicate_node->created);
  1524. // End of duplicate node group options construction.
  1525. }
  1526. $duplicate_node_group_tables['#tables'][] = $duplicate_node_group_table;
  1527. }
  1528. // Custom table select element output to use with forms.
  1529. $output = array(
  1530. '#proceed' => TRUE,
  1531. '#element' => array(
  1532. '#title' => t('Results'),
  1533. '#type' => 'remove_duplicates_tableselect',
  1534. '#options' => $duplicate_node_group_tables,
  1535. ),
  1536. );
  1537. return $output;
  1538. }
  1539. }
  1540. watchdog('remove_duplicates', 'Search - No duplicates found for node [@node_type_machine_name] according to field [@node_field_machine_name]', array(
  1541. '@node_type_machine_name' => $node_type_machine_name,
  1542. '@node_field_machine_name' => $node_field_machine_name,
  1543. ), WATCHDOG_INFO);
  1544. // Outputs a no duplicates found message.
  1545. $text_output = t('No duplicates for node type "@nodetype" according to field "@nodetypefield".', array(
  1546. '@nodetype' => ((string) $node_types[$node_type_machine_name]),
  1547. '@nodetypefield' => ((string) $node_types_fields[$node_type_machine_name][$node_field_machine_name]),
  1548. ));
  1549. $html_output = '<div class="description"><p><span style="font-weight:bold;color:red;">' . $text_output . '</span></p></div>';
  1550. $output = array(
  1551. '#proceed' => FALSE,
  1552. '#element' => array(
  1553. '#type' => 'item',
  1554. '#title' => t('Results'),
  1555. '#markup' => $html_output,
  1556. ),
  1557. );
  1558. return $output;
  1559. }
  1560. /**
  1561. * Implements hook_element_info().
  1562. */
  1563. function remove_duplicates_element_info() {
  1564. return array(
  1565. 'remove_duplicates_tableselect' => array(
  1566. '#tree' => TRUE,
  1567. '#input' => TRUE,
  1568. '#options' => array(),
  1569. '#process' => array(
  1570. 'remove_duplicates_form_process_tableselect',
  1571. ),
  1572. '#theme' => 'remove_duplicates_tableselect',
  1573. ));
  1574. }
  1575. /**
  1576. * Process function for Remove Duplicates tableselect.
  1577. */
  1578. function remove_duplicates_form_process_tableselect($element, $form_state, $complete_form) {
  1579. if (!empty($element['#options']) && (isset($element['#options']['#tables']) && is_array($element['#options']['#tables']) && count($element['#options']['#tables']))) {
  1580. $element['#value'] = (isset($element['#value']) && is_array($element['#value'])) ? $element['#value'] : array();
  1581. foreach ($element['#options']['#tables'] as &$sub_element) {
  1582. $sub_element['#tree'] = TRUE;
  1583. $value = (isset($sub_element['#value']) && is_array($sub_element['#value'])) ? $sub_element['#value'] : array();
  1584. $element['#value'] = array_unique(array_merge($element['#value'], $value));
  1585. if (isset($sub_element['#options']) && is_array($sub_element['#options']) && count($sub_element['#options'])) {
  1586. if (!isset($sub_element['#default_value']) || $sub_element['#default_value'] === 0) {
  1587. $sub_element['#default_value'] = array();
  1588. }
  1589. // Create a checkbox for each item in #options in such a way that the
  1590. // value of the tableselect element behaves as if it had been of type
  1591. // checkboxes.
  1592. foreach ($sub_element['#options'] as $key => $choice) {
  1593. // Do not overwrite manually created children.
  1594. if (!isset($element[$key])) {
  1595. $title = '';
  1596. if (!empty($sub_element['#options'][$key]['title']['data']['#title'])) {
  1597. $title = t('Update @title', array(
  1598. '@title' => $sub_element['#options'][$key]['title']['data']['#title'],
  1599. ));
  1600. }
  1601. $checked = array();
  1602. if (isset($value[$key]) && isset($sub_element['#default_value'][$key])) {
  1603. $checked = array('checked' => 'checked');
  1604. }
  1605. $element[$key] = array(
  1606. '#type' => 'checkbox',
  1607. '#title' => $title,
  1608. '#title_display' => 'invisible',
  1609. '#return_value' => $key,
  1610. '#default_value' => (!empty($checked) && isset($value[$key])) ? $key : NULL,
  1611. '#attributes' => (isset($sub_element['#attributes']) ? $sub_element['#attributes'] : array()) + $checked,
  1612. );
  1613. $element['#options'][$key] = TRUE;
  1614. if (isset($sub_element['#options'][$key]['#weight'])) {
  1615. $element[$key]['#weight'] = $sub_element['#options'][$key]['#weight'];
  1616. }
  1617. }
  1618. }
  1619. }
  1620. else {
  1621. $sub_element['#value'] = array();
  1622. }
  1623. }
  1624. }
  1625. return $element;
  1626. }
  1627. /**
  1628. * Implements hook_theme().
  1629. *
  1630. * Remove Duplicates tableselect theme function registration.
  1631. */
  1632. function remove_duplicates_theme($existing, $type, $theme, $path) {
  1633. return array(
  1634. 'remove_duplicates_tableselect' => array(
  1635. 'render element' => 'element',
  1636. ),
  1637. );
  1638. }
  1639. /**
  1640. * Implements hook_theme().
  1641. *
  1642. * Remove Duplicates tableselect theme function implementation.
  1643. */
  1644. function theme_remove_duplicates_tableselect($variables) {
  1645. $element = $variables['element'];
  1646. if (isset($element['#options']) && is_array($element['#options']['#tables'])) {
  1647. $rows = array();
  1648. if (!empty($element['#options']['#header']) && is_array($element['#options']['#header'])) {
  1649. // Add an empty header to provide room for
  1650. // the checkboxes in the first table column.
  1651. array_unshift($element['#options']['#header'], array('header' => TRUE, 'data' => array()));
  1652. $rows[] = $element['#options']['#header'];
  1653. }
  1654. foreach ($element['#options']['#tables'] as $sub_element) {
  1655. // Add an empty header to provide room for
  1656. // the checkboxes in the first table column.
  1657. if (!empty($sub_element['#header']) && is_array($sub_element['#header'])) {
  1658. if (!empty($sub_element['#header']['#prefix']) && is_array($sub_element['#header']['#prefix'])) {
  1659. array_unshift($sub_element['#header']['#prefix'], array('header' => TRUE, 'data' => array()));
  1660. $rows[] = $sub_element['#header']['#prefix'];
  1661. }
  1662. if (!empty($sub_element['#header']['#root']) && is_array($sub_element['#header']['#root'])) {
  1663. $header = $sub_element['#header']['#root'];
  1664. foreach ($sub_element['#header']['#root'] as $fieldname => $title) {
  1665. $sub_element['#header']['#root'][$fieldname] = array(
  1666. 'header' => TRUE,
  1667. 'data' => $title,
  1668. );
  1669. }
  1670. array_unshift($sub_element['#header']['#root'], array('header' => TRUE, 'data' => array()));
  1671. $rows[] = $sub_element['#header']['#root'];
  1672. }
  1673. }
  1674. if (!empty($sub_element['#options']) && is_array($sub_element['#options'])) {
  1675. // Generate a table row for each selectable item in #options.
  1676. foreach (element_children($sub_element['#options']) as $key) {
  1677. $row = array('data' => array());
  1678. if (isset($sub_element['#options'][$key]['#attributes'])) {
  1679. $row += $sub_element['#options'][$key]['#attributes'];
  1680. }
  1681. // Render the checkbox / radio element.
  1682. $row['data'][] = drupal_render($element[$key]);
  1683. // As theme_table only maps header and row columns by order,
  1684. // create the correct order by iterating over the header fields.
  1685. if (!empty($header)) {
  1686. foreach ($header as $fieldname => $title) {
  1687. $row['data'][] = $sub_element['#options'][$key][$fieldname];
  1688. }
  1689. }
  1690. $rows[] = $row;
  1691. }
  1692. }
  1693. }
  1694. return theme('table', array('rows' => $rows, 'attributes' => $element['#attributes']));
  1695. }
  1696. }