xmlsitemap.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. <?php
  2. // $Id: xmlsitemap.inc,v 1.14 2010/01/23 23:19:08 davereid Exp $
  3. /**
  4. * @file
  5. * Miscellaneous functions for the xmlsitemap module.
  6. *
  7. * @ingroup xmlsitemap
  8. */
  9. /**
  10. * Given an internal Drupal path, return the alias for the path.
  11. *
  12. * This is similar to drupal_get_path_alias(), but designed to fetch all alises
  13. * at once so that only one database query is executed instead of several or
  14. * possibly thousands during sitemap generation.
  15. *
  16. * @param $path
  17. * An internal Drupal path.
  18. * @param $language
  19. * A language code to look use when looking up the paths.
  20. */
  21. function xmlsitemap_get_path_alias($path, $language) {
  22. static $aliases;
  23. static $last_language;
  24. if (!isset($aliases)) {
  25. $aliases[LANGUAGE_NONE] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => LANGUAGE_NONE))->fetchAllKeyed();
  26. }
  27. if ($language != LANGUAGE_NONE && $last_language != $language) {
  28. unset($aliases[$last_language]);
  29. $aliases[$language] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => $language))->fetchAllKeyed();
  30. $last_language = $language;
  31. }
  32. if ($language != LANGUAGE_NONE && isset($aliases[$language][$path])) {
  33. return $aliases[$language][$path];
  34. }
  35. elseif (isset($aliases[LANGUAGE_NONE][$path])) {
  36. return $aliases[LANGUAGE_NONE][$path];
  37. }
  38. else {
  39. return $path;
  40. }
  41. }
  42. /**
  43. * Delete and regenerate the sitemap files.
  44. */
  45. function xmlsitemap_regenerate() {
  46. _xmlsitemap_regenerate_before();
  47. // Generate the sitemap pages.
  48. $chunk_count = xmlsitemap_get_chunk_count(TRUE);
  49. if ($chunk_count > 1) {
  50. // If we have more than one chunk, we need to increment this value by one
  51. // since the index page (chunk 0) will also need to be generated.
  52. $chunk_count++;
  53. }
  54. foreach (xmlsitemap_var('languages') as $language) {
  55. for ($i = 0; $i < $chunk_count; $i++) {
  56. xmlsitemap_generate($i, xmlsitemap_language_load($language));
  57. }
  58. }
  59. _xmlsitemap_regenerate_after();
  60. }
  61. /**
  62. * Perform operations before rebuilding the sitemap.
  63. */
  64. function _xmlsitemap_regenerate_before() {
  65. // Attempt to increase the available processing time and memory limit.
  66. drupal_set_time_limit(240);
  67. _xmlsitemap_set_memory_limit();
  68. // Set a timer so we can track how long this takes.
  69. timer_start('xmlsitemap_regenerate');
  70. // Get the current memory usage so we can track how much memory is used.
  71. _xmlsitemap_get_memory_usage(TRUE);
  72. // Clear all cached sitemap files.
  73. xmlsitemap_clear_directory();
  74. xmlsitemap_check_directory();
  75. }
  76. function _xmlsitemap_get_memory_usage($start = FALSE) {
  77. static $memory_start;
  78. $current = memory_get_peak_usage(TRUE);
  79. if (!isset($memory_start) || $start) {
  80. $memory_start = $current;
  81. }
  82. return $current - $memory_start;
  83. }
  84. function _xmlsitemap_get_optimal_memory_limit() {
  85. static $optimal_limit;
  86. if (!isset($optimal_limit)) {
  87. // Set the base memory amount from the provided core constant.
  88. $optimal_limit = parse_size(DRUPAL_MINIMUM_PHP_MEMORY_LIMIT);
  89. // Add memory based on the chunk size.
  90. $optimal_limit += xmlsitemap_get_chunk_size() * 500;
  91. // Add memory for storing the url aliases.
  92. $aliases = db_query("SELECT COUNT(pid) FROM {url_alias}")->fetchField();
  93. $optimal_limit += $aliases * 250;
  94. }
  95. return $optimal_limit;
  96. }
  97. /**
  98. * Calculate the optimal memory level for sitemap generation.
  99. */
  100. function _xmlsitemap_set_memory_limit() {
  101. $memory_limit = @ini_get('memory_limit');
  102. if ($memory_limit && $memory_limit != -1) {
  103. $optimal_limit = _xmlsitemap_get_optimal_memory_limit();
  104. if (parse_size($memory_limit) < $optimal_limit) {
  105. @ini_set('memory_limit', $optimal_limit);
  106. }
  107. }
  108. }
  109. /**
  110. * Perform operations after rebuilding the sitemap.
  111. */
  112. function _xmlsitemap_regenerate_after() {
  113. // Show a watchdog message that the sitemap was regenerated.
  114. watchdog('xmlsitemap',
  115. 'XML sitemap files regenerated in @timer ms. Peak memory usage: @memory-peak.',
  116. array(
  117. '@timer' => timer_read('xmlsitemap_regenerate'),
  118. '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
  119. ),
  120. WATCHDOG_NOTICE,
  121. l(t('View sitemap'), 'sitemap.xml')
  122. );
  123. // Unset the regenerate flag.
  124. variable_set('xmlsitemap_regenerate_needed', FALSE);
  125. // If the chunk count has changed, we will need to rebuild the menu.
  126. variable_set('menu_rebuild_needed', TRUE);
  127. variable_set('xmlsitemap_generated_last', REQUEST_TIME);
  128. }
  129. /**
  130. * Fetch the data from {xmlsitemap}, generates the sitemap, then caches it.
  131. *
  132. * @param $chunk
  133. * An integer representing the integer of the sitemap page chunk.
  134. * @param $language
  135. * A language object, defaults to the default language.
  136. * @return
  137. * TRUE on success; otherwise FALSE
  138. *
  139. * @todo Revise/simplify or remove the function.
  140. */
  141. function xmlsitemap_generate($chunk = 0, $language = NULL) {
  142. if (!is_numeric($chunk) || $chunk > xmlsitemap_get_chunk_count()) {
  143. // Don't bother translating this string.
  144. trigger_error('Improper condition hit in xmlsitemap_generate(). Chunk: ' . $chunk . ', Chunk Count: ' . xmlsitemap_get_chunk_count());
  145. return FALSE;
  146. }
  147. if (!isset($language)) {
  148. $language = language_default();
  149. }
  150. $file = xmlsitemap_get_chunk_file($chunk, $language->language);
  151. if (!$handle = fopen($file, 'wb')) {
  152. trigger_error(t('Could not open file @file for writing.', array('@file' => $file)));
  153. return FALSE;
  154. }
  155. $status = TRUE;
  156. if (xmlsitemap_get_chunk_count() > 1 && !$chunk) {
  157. xmlsitemap_generate_index($handle, $status, $language);
  158. }
  159. else {
  160. xmlsitemap_generate_chunk($handle, $status, $chunk, $language);
  161. }
  162. fclose($handle);
  163. if (!$status) {
  164. trigger_error(t('Unknown error occurred while writing to file @file.', array('@file' => $file)));
  165. }
  166. elseif (xmlsitemap_var('gz')) {
  167. $file_gz = xmlsitemap_get_chunk_file($chunk, $language->language, 'gz');
  168. file_put_contents($file_gz, gzencode(file_get_contents($file), 9));
  169. }
  170. return $status;
  171. }
  172. //function xmlsitemap_fwrite($handle, &$status, $string) {
  173. // $status &= (bool) fwrite($handle, $string);
  174. //}
  175. /**
  176. * Write the proper XML sitemap header.
  177. *
  178. * @param $type
  179. * @param $handle
  180. * A file system pointer resource that is typically created using fopen().
  181. * @param $status
  182. * @param $language
  183. */
  184. function xmlsitemap_generate_chunk_header($type, $handle, &$status, $language) {
  185. $output = '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL;
  186. // Add the stylesheet link.
  187. if (xmlsitemap_var('xsl')) {
  188. $xsl_url = url('sitemap.xsl', array('language' => $language, 'base_url' => xmlsitemap_var('base_url')));
  189. $output .= '<?xml-stylesheet type="text/xsl" href="' . $xsl_url . '"?>' . PHP_EOL;
  190. }
  191. $output .= '<' . $type . ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL;
  192. // This is the full XML header required for schema validation.
  193. //$schemas = array('sitemapindex' => 'siteindex.xsd', 'urlset' => 'sitemap.xsd');
  194. //$output .= '<' . $type . ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' . PHP_EOL;
  195. //$output .= ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . PHP_EOL;
  196. //$output .= ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9' . PHP_EOL;
  197. //$output .= ' http://www.sitemaps.org/schemas/sitemap/0.9/' . $schemas[$type] . '">' . PHP_EOL;
  198. $status &= (bool) fwrite($handle, $output);
  199. return $status;
  200. }
  201. /**
  202. * Generate one page (chunk) of the sitemap.
  203. *
  204. * @param $handle
  205. * A file system pointer resource that is typically created using fopen().
  206. * @param $status
  207. * A boolean that will be altered by reference with the success status of
  208. * writing to $handle.
  209. * @param $chunk
  210. * An integer representing the integer of the sitemap page chunk.
  211. * @param $language
  212. * A language object for the sitemap chunk.
  213. */
  214. function xmlsitemap_generate_chunk($handle, &$status, $chunk, $language) {
  215. $last_url = '';
  216. $url_options = xmlsitemap_get_url_options(array('alias' => TRUE));
  217. $query = db_select('xmlsitemap', 'x');
  218. $query->fields('x', array('loc', 'lastmod', 'changefreq', 'changecount', 'priority', 'language', 'access', 'status'));
  219. $query->condition('access', 1);
  220. $query->condition('status', 1);
  221. $query->orderBy('language', 'DESC');
  222. $query->orderBy('loc');
  223. $query->addTag('xmlsitemap');
  224. $query->addMetaData('language', $language);
  225. $offset = max($chunk - 1, 0) * xmlsitemap_get_chunk_size();
  226. $limit = xmlsitemap_get_chunk_size();
  227. $query->range($offset, $limit);
  228. $links = $query->execute();
  229. // Add the XML header and XSL if desired.
  230. xmlsitemap_generate_chunk_header('urlset', $handle, $status, $language);
  231. while ($link = $links->fetchAssoc()) {
  232. $url_options['language'] = ($link['language'] != LANGUAGE_NONE ? xmlsitemap_language_load($link['language']) : $language);
  233. $link['alias'] = xmlsitemap_get_path_alias($link['loc'], $url_options['language']->language);
  234. $link_url = url($link['alias'], $url_options);
  235. // Skip this link if it was a duplicate of the last one.
  236. // @todo Figure out a way to do this before generation so we can report
  237. // back to the user about this.
  238. if ($link_url == $last_url) {
  239. continue;
  240. }
  241. else {
  242. $last_url = $link_url;
  243. }
  244. $link_output = '<url><loc>' . $link_url . '</loc>';
  245. if ($link['lastmod']) {
  246. $link_output .= '<lastmod>' . gmdate(DATE_W3C, $link['lastmod']) . '</lastmod>';
  247. // If the link has a lastmod value, update the changefreq so that links
  248. // with a short changefreq but updated two years ago show decay.
  249. // We use abs() here just incase items were created on this same cron run
  250. // because lastmod would be greater than REQUEST_TIME.
  251. $link['changefreq'] = (abs(REQUEST_TIME - $link['lastmod']) + $link['changefreq']) / 2;
  252. }
  253. if ($link['changefreq']) {
  254. $link_output .= '<changefreq>' . xmlsitemap_get_changefreq($link['changefreq']) . '</changefreq>';
  255. }
  256. if (isset($link['priority']) && $link['priority'] != 0.5) {
  257. // Don't output the priority value for links that have 0.5 priority. This
  258. // is the default 'assumed' value if priority is not included as per the
  259. // sitemaps.org specification.
  260. $link_output .= '<priority>' . number_format($link['priority'], 1) . '</priority>';
  261. }
  262. $link_output .= '</url>' . PHP_EOL;
  263. $status &= (bool) fwrite($handle, $link_output);
  264. }
  265. // Close the XML file.
  266. $status &= (bool) fwrite($handle, '</urlset>' . PHP_EOL);
  267. return $status;
  268. }
  269. /**
  270. * Generate the index sitemap.
  271. *
  272. * @param $handle
  273. * A file system pointer resource that is typically created using fopen().
  274. * @param $status
  275. * @param $language
  276. * A language object, defaults to the default language.
  277. */
  278. function xmlsitemap_generate_index($handle, &$status, $language) {
  279. $url_options = xmlsitemap_get_url_options(array('language' => $language, 'alias' => TRUE));
  280. $chunk_count = xmlsitemap_get_chunk_count(TRUE);
  281. // Add the XML header and XSL if desired.
  282. xmlsitemap_generate_chunk_header('sitemapindex', $handle, $status, $language);
  283. for ($i = 1; $i <= $chunk_count; $i++) {
  284. $output = '<sitemap>';
  285. $output .= '<loc>' . url('sitemap-' . $i . '.xml', $url_options) . '</loc>';
  286. // @todo Use the actual lastmod value of the chunk file.
  287. $output .= '<lastmod>' . gmdate(DATE_W3C, REQUEST_TIME) . '</lastmod>';
  288. $output .= '</sitemap>' . PHP_EOL;
  289. $status &= (bool) fwrite($handle, $output);
  290. }
  291. // Close the XML file.
  292. $status &= (bool) fwrite($handle, '</sitemapindex>' . PHP_EOL);
  293. return $status;
  294. }
  295. /**
  296. * Batch information callback.
  297. */
  298. function xmlsitemap_rebuild_batch($modules = array(), $save_custom = FALSE) {
  299. $batch = array(
  300. 'operations' => array(),
  301. 'finished' => 'xmlsitemap_rebuild_batch_finished',
  302. 'title' => t('Rebuilding Sitemap'),
  303. 'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.inc',
  304. );
  305. // Purge any links first.
  306. $batch['operations'][] = array('xmlsitemap_rebuild_batch_clear', array($modules, $save_custom));
  307. // Fetch all the sitemap links and save them to the {xmlsitemap} table.
  308. foreach ($modules as $module) {
  309. if (module_hook($module, 'xmlsitemap_links')) {
  310. $batch['operations'][] = array('xmlsitemap_rebuild_batch_fetch', array($module));
  311. }
  312. }
  313. // Generate all the sitemap pages.
  314. $batch['operations'][] = array('_xmlsitemap_regenerate_before', array());
  315. foreach (xmlsitemap_var('languages') as $language) {
  316. $batch['operations'][] = array('xmlsitemap_rebuild_batch_generate', array(xmlsitemap_language_load($language)));
  317. }
  318. $batch['operations'][] = array('_xmlsitemap_regenerate_after', array());
  319. return $batch;
  320. }
  321. /**
  322. * Batch callback; clear sitemap links for modules.
  323. */
  324. function xmlsitemap_rebuild_batch_clear($modules, $save_custom, &$context) {
  325. $purge = array();
  326. foreach ($modules as $module) {
  327. $types = module_invoke($module, 'xmlsitemap_link_info');
  328. foreach ($types as $type => $info) {
  329. if ($info['purge']) {
  330. $purge[] = $type;
  331. }
  332. }
  333. }
  334. if ($purge) {
  335. $query = db_delete('xmlsitemap');
  336. $query->condition('type', $purge);
  337. // If we want to save the custom data, make sure to exclude any links
  338. // that are not using default inclusion or priority.
  339. if ($save_custom) {
  340. $query->condition('status_override', 0);
  341. $query->condition('priority_override', 0);
  342. }
  343. $query->execute();
  344. }
  345. $context['message'] = t('Purging links.');
  346. }
  347. /**
  348. * Batch callback; fetch and add the sitemap links for a specific module.
  349. */
  350. function xmlsitemap_rebuild_batch_fetch($module, &$context) {
  351. if (!isset($context['sandbox']['progress'])) {
  352. $context['sandbox']['batch'] = module_hook($module, 'xmlsitemap_links_batch_info');
  353. if ($context['sandbox']['batch']) {
  354. $context['sandbox'] += module_invoke($module, 'xmlsitemap_links_batch_info');
  355. }
  356. else {
  357. $context['sandbox']['links'] = module_invoke($module, 'xmlsitemap_links');
  358. $context['sandbox']['max'] = count($context['sandbox']['links']);
  359. }
  360. $context['sandbox'] += array(
  361. 'progress' => 0,
  362. 'current' => 0,
  363. );
  364. }
  365. if ($context['sandbox']['batch']) {
  366. $links = module_invoke($module, 'xmlsitemap_links', $context['sandbox']['current'], xmlsitemap_var('batch_limit'));
  367. }
  368. else {
  369. $links = array_splice($context['sandbox']['links'], 0, xmlsitemap_var('batch_limit'));
  370. }
  371. foreach ($links as $link) {
  372. xmlsitemap_save_link($link);
  373. $context['sandbox']['progress']++;
  374. $context['sandbox']['current'] = $link['id'];
  375. $context['message'] = t('Now processing %module link @count.', array('%module' => $module, '@count' => $context['sandbox']['progress']));
  376. }
  377. if ($context['sandbox']['progress'] != $context['sandbox']['max']) {
  378. $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
  379. }
  380. }
  381. /**
  382. * Batch callback; generate the sitemap chunks for a language.
  383. */
  384. function xmlsitemap_rebuild_batch_generate($language, &$context) {
  385. if (!isset($context['sandbox']['progress'])) {
  386. $context['sandbox']['progress'] = 0;
  387. $context['sandbox']['max'] = xmlsitemap_get_chunk_count(TRUE);
  388. // If we have more than one chunk, we need to increment this value by one
  389. // since the index page (chunk 0) will also need to be generated.
  390. if ($context['sandbox']['max'] > 1) {
  391. $context['sandbox']['max']++;
  392. }
  393. }
  394. xmlsitemap_generate($context['sandbox']['progress'], $language);
  395. $context['sandbox']['progress']++;
  396. $context['message'] = t('Now generating @language sitemap page @chunk.', array('@language' => $language->name, '@chunk' => $context['sandbox']['progress']));
  397. if ($context['sandbox']['progress'] != $context['sandbox']['max']) {
  398. $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
  399. }
  400. }
  401. /**
  402. * Batch callback; sitemap rebuild finished.
  403. */
  404. function xmlsitemap_rebuild_batch_finished($success, $results, $operations) {
  405. if ($success) {
  406. // Reset the rebuild flag since it was successful.
  407. variable_set('xmlsitemap_rebuild_needed', FALSE);
  408. drupal_set_message(t('The sitemap was rebuilt.'));
  409. }
  410. else {
  411. drupal_set_message(t('The sitemap was not successfully rebuilt.'), 'error');
  412. }
  413. }
  414. /**
  415. * Fetch a short blurb string about module maintainership and sponsors.
  416. *
  417. * This message will be FALSE in 'official' releases.
  418. */
  419. function _xmlsitemap_get_blurb($check_version = TRUE) {
  420. static $blurb;
  421. if (!isset($blurb)) {
  422. $blurb = FALSE;
  423. if (!$check_version || (($version = _xmlsitemap_get_version()) && preg_match('/dev|unstable|alpha|beta|HEAD/i', $version))) {
  424. $sponsors = array(
  425. l('Symantec', 'http://www.symantec.com/'),
  426. l('WebWise Solutions', 'http://www.webwiseone.com/'),
  427. l('Volacci', 'http://www.volacci.com/'),
  428. l('lanetro', 'http://www.lanetro.com/'),
  429. l('Coupons Dealuxe', 'http://couponsdealuxe.com/'),
  430. );
  431. // Don't extract the following string for translation.
  432. $blurb = '<div class="description"><p>Thank you for helping test the XML sitemap module rewrite. Please consider helping offset developer free time by <a href="http://davereid.chipin.com/">donating</a> or if your company is interested in sponsoring the rewrite or a specific feature, please <a href="http://davereid.net/contact">contact the developer</a>. Thank you to the following current sponsors: ' . implode(', ', $sponsors) . ', and all the indivduals that have donated. This message will not be seen in the stable versions.</p></div>';
  433. //http://drupalmodules.com/module/xml-sitemap
  434. }
  435. }
  436. return $blurb;
  437. }
  438. function _xmlsitemap_get_version() {
  439. static $version;
  440. if (!isset($version)) {
  441. $modules = _system_rebuild_module_data();
  442. $version = $modules['xmlsitemap']->info['version'];
  443. }
  444. return $version;
  445. }