xmlsitemap.generate.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. <?php
  2. /**
  3. * @file
  4. * Sitemap generation and rebuilding functions for the xmlsitemap module.
  5. *
  6. * @ingroup xmlsitemap
  7. */
  8. /**
  9. * Given an internal Drupal path, return the alias for the path.
  10. *
  11. * This is similar to drupal_get_path_alias(), but designed to fetch all alises
  12. * at once so that only one database query is executed instead of several or
  13. * possibly thousands during sitemap generation.
  14. *
  15. * @param $path
  16. * An internal Drupal path.
  17. * @param $language
  18. * A language code to use when looking up the paths.
  19. */
  20. function xmlsitemap_get_path_alias($path, $language) {
  21. static $aliases;
  22. static $last_language;
  23. if (!isset($aliases)) {
  24. $aliases[LANGUAGE_NONE] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => LANGUAGE_NONE))->fetchAllKeyed();
  25. }
  26. if ($language != LANGUAGE_NONE && $last_language != $language) {
  27. unset($aliases[$last_language]);
  28. $aliases[$language] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => $language))->fetchAllKeyed();
  29. $last_language = $language;
  30. }
  31. if ($language != LANGUAGE_NONE && isset($aliases[$language][$path])) {
  32. return $aliases[$language][$path];
  33. }
  34. elseif (isset($aliases[LANGUAGE_NONE][$path])) {
  35. return $aliases[LANGUAGE_NONE][$path];
  36. }
  37. else {
  38. return $path;
  39. }
  40. }
  41. /**
  42. * Perform operations before rebuilding the sitemap.
  43. */
  44. function _xmlsitemap_regenerate_before() {
  45. // Attempt to increase the memory limit.
  46. _xmlsitemap_set_memory_limit();
  47. if (variable_get('xmlsitemap_developer_mode', 0)) {
  48. watchdog('xmlsitemap', 'Starting XML sitemap generation. Memory usage: @memory-peak.', array(
  49. '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
  50. ),
  51. WATCHDOG_DEBUG
  52. );
  53. }
  54. }
  55. function _xmlsitemap_get_memory_usage($start = FALSE) {
  56. static $memory_start;
  57. $current = memory_get_peak_usage(TRUE);
  58. if (!isset($memory_start) || $start) {
  59. $memory_start = $current;
  60. }
  61. return $current - $memory_start;
  62. }
  63. /**
  64. * Calculate the optimal PHP memory limit for sitemap generation.
  65. *
  66. * This function just makes a guess. It does not take into account
  67. * the currently loaded modules.
  68. */
  69. function _xmlsitemap_get_optimal_memory_limit() {
  70. $optimal_limit = &drupal_static(__FUNCTION__);
  71. if (!isset($optimal_limit)) {
  72. // Set the base memory amount from the provided core constant.
  73. $optimal_limit = parse_size(DRUPAL_MINIMUM_PHP_MEMORY_LIMIT);
  74. // Add memory based on the chunk size.
  75. $optimal_limit += xmlsitemap_get_chunk_size() * 500;
  76. // Add memory for storing the url aliases.
  77. if (variable_get('xmlsitemap_prefetch_aliases', 1)) {
  78. $aliases = db_query("SELECT COUNT(pid) FROM {url_alias}")->fetchField();
  79. $optimal_limit += $aliases * 250;
  80. }
  81. }
  82. return $optimal_limit;
  83. }
  84. /**
  85. * Calculate the optimal memory level for sitemap generation.
  86. *
  87. * @param $new_limit
  88. * An optional PHP memory limit in bytes. If not provided, the value of
  89. * _xmlsitemap_get_optimal_memory_limit() will be used.
  90. */
  91. function _xmlsitemap_set_memory_limit($new_limit = NULL) {
  92. $current_limit = @ini_get('memory_limit');
  93. if ($current_limit && $current_limit != -1) {
  94. if (!is_null($new_limit)) {
  95. $new_limit = _xmlsitemap_get_optimal_memory_limit();
  96. }
  97. if (parse_size($current_limit) < $new_limit) {
  98. return @ini_set('memory_limit', $new_limit);
  99. }
  100. }
  101. }
  102. /**
  103. * Generate one page (chunk) of the sitemap.
  104. *
  105. * @param $sitemap
  106. * An unserialized data array for an XML sitemap.
  107. * @param $page
  108. * An integer of the specific page of the sitemap to generate.
  109. */
  110. function xmlsitemap_generate_page(stdClass $sitemap, $page) {
  111. try {
  112. $writer = new XMLSitemapWriter($sitemap, $page);
  113. $writer->startDocument();
  114. $writer->generateXML();
  115. $writer->endDocument();
  116. }
  117. catch (Exception $e) {
  118. watchdog_exception('xmlsitemap', $e);
  119. throw $e;
  120. return FALSE;
  121. }
  122. return $writer->getSitemapElementCount();
  123. }
  124. function xmlsitemap_generate_chunk(stdClass $sitemap, XMLSitemapWriter $writer, $chunk) {
  125. $lastmod_format = variable_get('xmlsitemap_lastmod_format', XMLSITEMAP_LASTMOD_MEDIUM);
  126. $url_options = $sitemap->uri['options'];
  127. $url_options += array(
  128. 'absolute' => TRUE,
  129. 'base_url' => variable_get('xmlsitemap_base_url', $GLOBALS['base_url']),
  130. 'language' => language_default(),
  131. 'alias' => variable_get('xmlsitemap_prefetch_aliases', TRUE),
  132. );
  133. $last_url = '';
  134. $link_count = 0;
  135. $query = db_select('xmlsitemap', 'x');
  136. $query->fields('x', array('loc', 'lastmod', 'changefreq', 'changecount', 'priority', 'language', 'access', 'status'));
  137. $query->condition('x.access', 1);
  138. $query->condition('x.status', 1);
  139. $query->orderBy('x.language', 'DESC');
  140. $query->orderBy('x.loc');
  141. $query->addTag('xmlsitemap_generate');
  142. $query->addMetaData('sitemap', $sitemap);
  143. $offset = max($chunk - 1, 0) * xmlsitemap_get_chunk_size();
  144. $limit = xmlsitemap_get_chunk_size();
  145. $query->range($offset, $limit);
  146. $links = $query->execute();
  147. while ($link = $links->fetchAssoc()) {
  148. $link['language'] = $link['language'] != LANGUAGE_NONE ? xmlsitemap_language_load($link['language']) : $url_options['language'];
  149. if ($url_options['alias']) {
  150. $link['loc'] = xmlsitemap_get_path_alias($link['loc'], $link['language']->language);
  151. }
  152. $link_options = array(
  153. 'language' => $link['language'],
  154. 'xmlsitemap_link' => $link,
  155. 'xmlsitemap_sitemap' => $sitemap,
  156. );
  157. // @todo Add a separate hook_xmlsitemap_link_url_alter() here?
  158. $link_url = url($link['loc'], $link_options + $url_options);
  159. // Skip this link if it was a duplicate of the last one.
  160. // @todo Figure out a way to do this before generation so we can report
  161. // back to the user about this.
  162. if ($link_url == $last_url) {
  163. continue;
  164. }
  165. else {
  166. $last_url = $link_url;
  167. // Keep track of the total number of links written.
  168. $link_count++;
  169. }
  170. $element = array();
  171. $element['loc'] = $link_url;
  172. if ($link['lastmod']) {
  173. $element['lastmod'] = gmdate($lastmod_format, $link['lastmod']);
  174. // If the link has a lastmod value, update the changefreq so that links
  175. // with a short changefreq but updated two years ago show decay.
  176. // We use abs() here just incase items were created on this same cron run
  177. // because lastmod would be greater than REQUEST_TIME.
  178. $link['changefreq'] = (abs(REQUEST_TIME - $link['lastmod']) + $link['changefreq']) / 2;
  179. }
  180. if ($link['changefreq']) {
  181. $element['changefreq'] = xmlsitemap_get_changefreq($link['changefreq']);
  182. }
  183. if (isset($link['priority']) && $link['priority'] != 0.5) {
  184. // Don't output the priority value for links that have 0.5 priority. This
  185. // is the default 'assumed' value if priority is not included as per the
  186. // sitemaps.org specification.
  187. $element['priority'] = number_format($link['priority'], 1);
  188. }
  189. $writer->writeSitemapElement('url', $element);
  190. }
  191. return $link_count;
  192. }
  193. /**
  194. * Generate the index sitemap.
  195. *
  196. * @param $sitemap
  197. * An unserialized data array for an XML sitemap.
  198. */
  199. function xmlsitemap_generate_index(stdClass $sitemap) {
  200. try {
  201. $writer = new XMLSitemapIndexWriter($sitemap);
  202. $writer->startDocument();
  203. $writer->generateXML();
  204. $writer->endDocument();
  205. }
  206. catch (Exception $e) {
  207. watchdog_exception('xmlsitemap', $e);
  208. throw $e;
  209. return FALSE;
  210. }
  211. return $writer->getSitemapElementCount();
  212. }
  213. // BATCH OPERATIONS ------------------------------------------------------------
  214. /**
  215. * Batch information callback for regenerating the sitemap files.
  216. *
  217. * @param $smids
  218. * An optional array of XML sitemap IDs. If not provided, it will load all
  219. * existing XML sitemaps.
  220. */
  221. function xmlsitemap_regenerate_batch(array $smids = array()) {
  222. if (empty($smids)) {
  223. $smids = db_query("SELECT smid FROM {xmlsitemap_sitemap}")->fetchCol();
  224. }
  225. //$t = get_t();
  226. $batch = array(
  227. 'operations' => array(),
  228. //'error_message' => $t('An error has occurred.'),
  229. 'finished' => 'xmlsitemap_regenerate_batch_finished',
  230. 'title' => t('Regenerating Sitemap'),
  231. 'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
  232. );
  233. // Set the regenerate flag in case something fails during file generation.
  234. $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_regenerate_needed' => TRUE)));
  235. // @todo Get rid of this batch operation.
  236. $batch['operations'][] = array('_xmlsitemap_regenerate_before', array());
  237. // Generate all the sitemap pages for each context.
  238. foreach ($smids as $smid) {
  239. $batch['operations'][] = array('xmlsitemap_regenerate_batch_generate', array($smid));
  240. $batch['operations'][] = array('xmlsitemap_regenerate_batch_generate_index', array($smid));
  241. }
  242. // Clear the regeneration flag.
  243. $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_regenerate_needed' => FALSE)));
  244. return $batch;
  245. }
  246. /**
  247. * Batch callback; generate all pages of a sitemap.
  248. */
  249. function xmlsitemap_regenerate_batch_generate($smid, array &$context) {
  250. if (!isset($context['sandbox']['sitemap'])) {
  251. $context['sandbox']['sitemap'] = xmlsitemap_sitemap_load($smid);
  252. $context['sandbox']['sitemap']->chunks = 1;
  253. $context['sandbox']['sitemap']->links = 0;
  254. $context['sandbox']['max'] = XMLSITEMAP_MAX_SITEMAP_LINKS;
  255. // Clear the cache directory for this sitemap before generating any files.
  256. xmlsitemap_check_directory($context['sandbox']['sitemap']);
  257. xmlsitemap_clear_directory($context['sandbox']['sitemap']);
  258. }
  259. $sitemap = &$context['sandbox']['sitemap'];
  260. $links = xmlsitemap_generate_page($sitemap, $sitemap->chunks);
  261. $context['message'] = t('Now generating %sitemap-url.', array('%sitemap-url' => url('sitemap.xml', $sitemap->uri['options'] + array('query' => array('page' => $sitemap->chunks)))));
  262. if ($links) {
  263. $sitemap->links += $links;
  264. $sitemap->chunks++;
  265. }
  266. else {
  267. // Cleanup the 'extra' empty file.
  268. $file = xmlsitemap_sitemap_get_file($sitemap, $sitemap->chunks);
  269. if (file_exists($file) && $sitemap->chunks > 1) {
  270. file_unmanaged_delete($file);
  271. }
  272. $sitemap->chunks--;
  273. // Save the updated chunks and links values.
  274. $context['sandbox']['max'] = $sitemap->chunks;
  275. $sitemap->updated = REQUEST_TIME;
  276. xmlsitemap_sitemap_get_max_filesize($sitemap);
  277. xmlsitemap_sitemap_save($sitemap);
  278. }
  279. if ($sitemap->chunks != $context['sandbox']['max']) {
  280. $context['finished'] = $sitemap->chunks / $context['sandbox']['max'];
  281. }
  282. }
  283. /**
  284. * Batch callback; generate the index page of a sitemap.
  285. */
  286. function xmlsitemap_regenerate_batch_generate_index($smid, array &$context) {
  287. $sitemap = xmlsitemap_sitemap_load($smid);
  288. if ($sitemap->chunks > 1) {
  289. xmlsitemap_generate_index($sitemap);
  290. $context['message'] = t('Now generating sitemap index %sitemap-url.', array('%sitemap-url' => url('sitemap.xml', $sitemap->uri['options'])));
  291. }
  292. }
  293. /**
  294. * Batch callback; sitemap regeneration finished.
  295. */
  296. function xmlsitemap_regenerate_batch_finished($success, $results, $operations, $elapsed) {
  297. if ($success && !variable_get('xmlsitemap_regenerate_needed', FALSE)) {
  298. variable_set('xmlsitemap_generated_last', REQUEST_TIME);
  299. //drupal_set_message(t('The sitemaps were regenerated.'));
  300. // Show a watchdog message that the sitemap was regenerated.
  301. watchdog('xmlsitemap',
  302. 'Finished XML sitemap generation in @elapsed. Memory usage: @memory-peak.',
  303. array(
  304. '@elapsed' => $elapsed,
  305. '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
  306. ),
  307. WATCHDOG_NOTICE
  308. );
  309. }
  310. else {
  311. drupal_set_message(t('The sitemaps were not successfully regenerated.'), 'error');
  312. }
  313. }
  314. /**
  315. * Batch information callback for rebuilding the sitemap data.
  316. */
  317. function xmlsitemap_rebuild_batch(array $entities, $save_custom = FALSE) {
  318. $batch = array(
  319. 'operations' => array(),
  320. 'finished' => 'xmlsitemap_rebuild_batch_finished',
  321. 'title' => t('Rebuilding Sitemap'),
  322. 'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
  323. );
  324. // Set the rebuild flag in case something fails during the rebuild.
  325. $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_rebuild_needed' => TRUE)));
  326. // Purge any links first.
  327. $batch['operations'][] = array('xmlsitemap_rebuild_batch_clear', array($entities, (bool) $save_custom));
  328. // Fetch all the sitemap links and save them to the {xmlsitemap} table.
  329. foreach ($entities as $entity) {
  330. $info = xmlsitemap_get_link_info($entity);
  331. $batch['operations'][] = array($info['xmlsitemap']['rebuild callback'], array($entity));
  332. }
  333. // Clear the rebuild flag.
  334. $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_rebuild_needed' => FALSE)));
  335. // Add the regeneration batch.
  336. $regenerate_batch = xmlsitemap_regenerate_batch();
  337. $batch['operations'] = array_merge($batch['operations'], $regenerate_batch['operations']);
  338. return $batch;
  339. }
  340. /**
  341. * Batch callback; set an array of variables and their values.
  342. */
  343. function xmlsitemap_batch_variable_set(array $variables) {
  344. foreach ($variables as $variable => $value) {
  345. variable_set($variable, $value);
  346. }
  347. }
  348. /**
  349. * Batch callback; clear sitemap links for entites.
  350. */
  351. function xmlsitemap_rebuild_batch_clear(array $entities, $save_custom, &$context) {
  352. if (!empty($entities)) {
  353. $query = db_delete('xmlsitemap');
  354. $query->condition('type', $entities);
  355. // If we want to save the custom data, make sure to exclude any links
  356. // that are not using default inclusion or priority.
  357. if ($save_custom) {
  358. $query->condition('status_override', 0);
  359. $query->condition('priority_override', 0);
  360. }
  361. $query->execute();
  362. }
  363. $context['message'] = t('Purging links.');
  364. }
  365. /**
  366. * Batch callback; fetch and add the sitemap links for a specific entity.
  367. */
  368. function xmlsitemap_rebuild_batch_fetch($entity, &$context) {
  369. if (!isset($context['sandbox']['info'])) {
  370. $context['sandbox']['info'] = xmlsitemap_get_link_info($entity);
  371. $context['sandbox']['progress'] = 0;
  372. $context['sandbox']['last_id'] = 0;
  373. }
  374. $info = $context['sandbox']['info'];
  375. $query = new EntityFieldQuery();
  376. $query->entityCondition('entity_type', $entity);
  377. $query->entityCondition('entity_id', $context['sandbox']['last_id'], '>');
  378. $query->addTag('xmlsitemap_link_bundle_access');
  379. $query->addTag('xmlsitemap_rebuild');
  380. $query->addMetaData('entity', $entity);
  381. $query->addMetaData('entity_info', $info);
  382. if (!isset($context['sandbox']['max'])) {
  383. $count_query = clone $query;
  384. $count_query->count();
  385. $context['sandbox']['max'] = $count_query->execute();
  386. if (!$context['sandbox']['max']) {
  387. // If there are no items to process, skip everything else.
  388. return;
  389. }
  390. }
  391. // PostgreSQL cannot have the ORDERED BY in the count query.
  392. $query->entityOrderBy('entity_id');
  393. $limit = 20; //variable_get('xmlsitemap_batch_limit', 100)
  394. $query->range(0, $limit);
  395. $result = $query->execute();
  396. $ids = array_keys($result[$entity]);
  397. $info['xmlsitemap']['process callback']($ids);
  398. $context['sandbox']['last_id'] = end($ids);
  399. $context['sandbox']['progress'] += count($ids);
  400. $context['message'] = t('Now processing %entity @last_id (@progress of @count).', array('%entity' => $entity, '@last_id' => $context['sandbox']['last_id'], '@progress' => $context['sandbox']['progress'], '@count' => $context['sandbox']['max']));
  401. if ($context['sandbox']['progress'] >= $context['sandbox']['max']) {
  402. $context['finished'] = 1;
  403. }
  404. else {
  405. $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
  406. }
  407. }
  408. /**
  409. * Batch callback; sitemap rebuild finished.
  410. */
  411. function xmlsitemap_rebuild_batch_finished($success, $results, $operations, $elapsed) {
  412. if ($success && !variable_get('xmlsitemap_rebuild_needed', FALSE)) {
  413. drupal_set_message(t('The sitemap links were rebuilt.'));
  414. }
  415. else {
  416. drupal_set_message(t('The sitemap links were not successfully rebuilt.'), 'error');
  417. }
  418. }
  419. function xmlsitemap_get_rebuildable_link_types() {
  420. $rebuild_types = array();
  421. $entities = xmlsitemap_get_link_info();
  422. foreach ($entities as $entity => $info) {
  423. if (empty($info['xmlsitemap']['rebuild callback'])) {
  424. // If the entity is missing a rebuild callback, skip.
  425. continue;
  426. }
  427. if (!empty($info['entity keys']['bundle']) && !xmlsitemap_get_link_type_enabled_bundles($entity)) {
  428. // If the entity has bundles, but no enabled bundles, skip since
  429. // rebuilding wouldn't get any links.
  430. continue;
  431. }
  432. else {
  433. $rebuild_types[] = $entity;
  434. }
  435. }
  436. return $rebuild_types;
  437. }