123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584 |
- <?php
- /**
- * @file
- * Sitemap generation and rebuilding functions for the xmlsitemap module.
- *
- * @ingroup xmlsitemap
- */
- /**
- * Given an internal Drupal path, return the alias for the path.
- *
- * This is similar to drupal_get_path_alias(), but designed to fetch all alises
- * at once so that only one database query is executed instead of several or
- * possibly thousands during sitemap generation.
- *
- * @param string $path
- * An internal Drupal path.
- * @param string $language
- * A language code to use when looking up the paths.
- */
- function xmlsitemap_get_path_alias($path, $language) {
- static $aliases;
- static $last_language;
- if (!isset($aliases)) {
- $aliases[LANGUAGE_NONE] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => LANGUAGE_NONE))->fetchAllKeyed();
- }
- if ($language != LANGUAGE_NONE && $last_language != $language) {
- unset($aliases[$last_language]);
- $aliases[$language] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(':language' => $language))->fetchAllKeyed();
- $last_language = $language;
- }
- // We need to pass our path through hook_url_outbound_alter(). This fixes
- // clean URLs not working when they don't exist in the {url_alias} table and
- // are created with something like subpathauto.
- $normalized_path = $path;
- // hook_url_outbound_alter() expects defaults in url() options.
- $options = array(
- 'fragment' => '',
- 'query' => array(),
- 'absolute' => FALSE,
- 'alias' => FALSE,
- 'prefix' => '',
- 'external' => FALSE,
- );
- if ($language != LANGUAGE_NONE && isset($aliases[$language][$path])) {
- $normalized_path = $aliases[$language][$path];
- $options['alias'] = TRUE;
- }
- elseif (isset($aliases[LANGUAGE_NONE][$path])) {
- $normalized_path = $aliases[LANGUAGE_NONE][$path];
- $options['alias'] = TRUE;
- }
- $original_path = $normalized_path;
- drupal_alter('url_outbound', $normalized_path, $options, $original_path);
- return $normalized_path;
- }
- /**
- * Perform operations before rebuilding the sitemap.
- */
- function _xmlsitemap_regenerate_before() {
- // Attempt to increase the memory limit.
- _xmlsitemap_set_memory_limit();
- if (variable_get('xmlsitemap_developer_mode', 0)) {
- watchdog('xmlsitemap', 'Starting XML sitemap generation. Memory usage: @memory-peak.', array(
- '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
- ),
- WATCHDOG_DEBUG
- );
- }
- }
- /**
- * Get Memory Usage.
- */
- function _xmlsitemap_get_memory_usage($start = FALSE) {
- static $memory_start;
- $current = memory_get_peak_usage(TRUE);
- if (!isset($memory_start) || $start) {
- $memory_start = $current;
- }
- return $current - $memory_start;
- }
- /**
- * Calculate the optimal PHP memory limit for sitemap generation.
- *
- * This function just makes a guess. It does not take into account
- * the currently loaded modules.
- */
- function _xmlsitemap_get_optimal_memory_limit() {
- $optimal_limit = &drupal_static(__FUNCTION__);
- if (!isset($optimal_limit)) {
- // Set the base memory amount from the provided core constant.
- $optimal_limit = parse_size(DRUPAL_MINIMUM_PHP_MEMORY_LIMIT);
- // Add memory based on the chunk size.
- $optimal_limit += xmlsitemap_get_chunk_size() * 500;
- // Add memory for storing the url aliases.
- if (variable_get('xmlsitemap_prefetch_aliases', 1)) {
- $aliases = db_query("SELECT COUNT(pid) FROM {url_alias}")->fetchField();
- $optimal_limit += $aliases * 250;
- }
- }
- return $optimal_limit;
- }
- /**
- * Calculate the optimal memory level for sitemap generation.
- *
- * @param string $new_limit
- * An optional PHP memory limit in bytes. If not provided, the value of
- * _xmlsitemap_get_optimal_memory_limit() will be used.
- */
- function _xmlsitemap_set_memory_limit($new_limit = NULL) {
- $current_limit = @ini_get('memory_limit');
- if ($current_limit && $current_limit != -1) {
- if (!is_null($new_limit)) {
- $new_limit = _xmlsitemap_get_optimal_memory_limit();
- }
- if (parse_size($current_limit) < $new_limit) {
- return @ini_set('memory_limit', $new_limit);
- }
- }
- }
- /**
- * Generate one page (chunk) of the sitemap.
- *
- * @param object $sitemap
- * An unserialized data array for an XML sitemap.
- * @param string $page
- * An integer of the specific page of the sitemap to generate.
- */
- function xmlsitemap_generate_page(stdClass $sitemap, $page) {
- try {
- $writer = new XMLSitemapWriter($sitemap, $page);
- $writer->startDocument();
- $writer->generateXML();
- $writer->endDocument();
- }
- catch (Exception $e) {
- watchdog_exception('xmlsitemap', $e);
- throw $e;
- }
- return $writer->getSitemapElementCount();
- }
- /**
- * Generate chunk.
- */
- function xmlsitemap_generate_chunk(stdClass $sitemap, XMLSitemapWriter $writer, $chunk) {
- global $base_url;
- $output_elements = drupal_map_assoc(variable_get('xmlsitemap_output_elements', array(
- 'lastmod',
- 'changefreq',
- 'priority',
- )));
- $lastmod_format = variable_get('xmlsitemap_lastmod_format', XMLSITEMAP_LASTMOD_MEDIUM);
- $url_options = $sitemap->uri['options'];
- $url_options += array(
- 'absolute' => TRUE,
- 'base_url' => variable_get('xmlsitemap_base_url', $base_url),
- 'language' => language_default(),
- 'alias' => variable_get('xmlsitemap_prefetch_aliases', TRUE),
- );
- $last_url = '';
- $link_count = 0;
- $query = db_select('xmlsitemap', 'x');
- $query->fields('x', array(
- 'id',
- 'type',
- 'subtype',
- 'loc',
- 'lastmod',
- 'changefreq',
- 'changecount',
- 'priority',
- 'language',
- 'access',
- 'status',
- ));
- $query->condition('x.access', 1);
- $query->condition('x.status', 1);
- $query->orderBy('x.language', 'DESC');
- $query->orderBy('x.loc');
- $query->addTag('xmlsitemap_generate');
- $query->addMetaData('sitemap', $sitemap);
- $offset = max($chunk - 1, 0) * xmlsitemap_get_chunk_size();
- $limit = xmlsitemap_get_chunk_size();
- $query->range($offset, $limit);
- $links = $query->execute();
- while ($link = $links->fetchAssoc()) {
- $link['language'] = $link['language'] != LANGUAGE_NONE ? xmlsitemap_language_load($link['language']) : $url_options['language'];
- $parsed_url = drupal_parse_url($link['loc']);
- // Remove query or fragment.
- $link['loc'] = $parsed_url['path'];
- if ($url_options['alias']) {
- $link['loc'] = xmlsitemap_get_path_alias($link['loc'], $link['language']->language);
- }
- $link_options = array(
- 'language' => $link['language'],
- 'xmlsitemap_link' => $link,
- 'xmlsitemap_sitemap' => $sitemap,
- 'query' => $parsed_url['query'],
- 'fragment' => $parsed_url['fragment'],
- );
- // @todo Add a separate hook_xmlsitemap_link_url_alter() here?
- $link_url = url($link['loc'], $link_options + $url_options);
- // Skip this link if it was a duplicate of the last one.
- // @todo Figure out a way to do this before generation so we can report
- // back to the user about this.
- if ($link_url == $last_url) {
- continue;
- }
- else {
- $last_url = $link_url;
- // Keep track of the total number of links written.
- $link_count++;
- }
- $element = array();
- $element['loc'] = $link_url;
- if ($link['lastmod']) {
- if (!empty($output_elements['lastmod'])) {
- $element['lastmod'] = gmdate($lastmod_format, $link['lastmod']);
- }
- // If the link has a lastmod value, update the changefreq so that links
- // with a short changefreq but updated two years ago show decay.
- // We use abs() here just incase items were created on this same cron run
- // because lastmod would be greater than REQUEST_TIME.
- $link['changefreq'] = (abs(REQUEST_TIME - $link['lastmod']) + $link['changefreq']) / 2;
- }
- if (!empty($output_elements['changefreq']) && $link['changefreq']) {
- $element['changefreq'] = xmlsitemap_get_changefreq($link['changefreq']);
- }
- if (!empty($output_elements['priority']) && isset($link['priority']) && $link['priority'] != 0.5) {
- // Don't output the priority value for links that have 0.5 priority. This
- // is the default 'assumed' value if priority is not included as per the
- // sitemaps.org specification.
- $element['priority'] = number_format($link['priority'], 1);
- }
- // @todo Should this be moved to XMLSitemapWritier::writeSitemapElement()?
- drupal_alter('xmlsitemap_element', $element, $link, $sitemap);
- $writer->writeSitemapElement('url', $element);
- }
- return $link_count;
- }
- /**
- * Generate the index sitemap.
- *
- * @param object $sitemap
- * An unserialized data array for an XML sitemap.
- */
- function xmlsitemap_generate_index(stdClass $sitemap) {
- try {
- $writer = new XMLSitemapIndexWriter($sitemap);
- $writer->startDocument();
- $writer->generateXML();
- $writer->endDocument();
- }
- catch (Exception $e) {
- watchdog_exception('xmlsitemap', $e);
- throw $e;
- }
- return $writer->getSitemapElementCount();
- }
- /**
- * BATCH OPERATIONS -----------------------------------------------------------.
- *
- * Batch information callback for regenerating the sitemap files.
- *
- * @param array $smids
- * An optional array of XML sitemap IDs. If not provided, it will load all
- * existing XML sitemaps.
- */
- function xmlsitemap_regenerate_batch(array $smids = array()) {
- if (empty($smids)) {
- $smids = db_query("SELECT smid FROM {xmlsitemap_sitemap}")->fetchCol();
- }
- $batch = array(
- 'operations' => array(),
- 'finished' => 'xmlsitemap_regenerate_batch_finished',
- 'title' => t('Regenerating Sitemap'),
- 'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
- );
- // Set the regenerate flag in case something fails during file generation.
- $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_regenerate_needed' => TRUE)));
- // @todo Get rid of this batch operation.
- $batch['operations'][] = array('_xmlsitemap_regenerate_before', array());
- // Generate all the sitemap pages for each context.
- foreach ($smids as $smid) {
- $batch['operations'][] = array('xmlsitemap_regenerate_batch_generate', array($smid));
- $batch['operations'][] = array('xmlsitemap_regenerate_batch_generate_index', array($smid));
- }
- // Clear the regeneration flag.
- $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_regenerate_needed' => FALSE)));
- return $batch;
- }
- /**
- * Batch callback; generate all pages of a sitemap.
- */
- function xmlsitemap_regenerate_batch_generate($smid, array &$context) {
- if (!isset($context['sandbox']['sitemap'])) {
- $context['sandbox']['sitemap'] = xmlsitemap_sitemap_load($smid);
- $context['sandbox']['sitemap']->chunks = 1;
- $context['sandbox']['sitemap']->links = 0;
- $context['sandbox']['max'] = XMLSITEMAP_MAX_SITEMAP_LINKS;
- // Clear the cache directory for this sitemap before generating any files.
- xmlsitemap_check_directory($context['sandbox']['sitemap']);
- xmlsitemap_clear_directory($context['sandbox']['sitemap']);
- }
- $sitemap = &$context['sandbox']['sitemap'];
- $links = xmlsitemap_generate_page($sitemap, $sitemap->chunks);
- $context['message'] = t('Now generating %sitemap-url.', array('%sitemap-url' => url('sitemap.xml', $sitemap->uri['options'] + array('query' => array('page' => $sitemap->chunks)))));
- if ($links) {
- $sitemap->links += $links;
- $sitemap->chunks++;
- }
- else {
- // Cleanup the 'extra' empty file.
- $file = xmlsitemap_sitemap_get_file($sitemap, $sitemap->chunks);
- if (file_exists($file) && $sitemap->chunks > 1) {
- file_unmanaged_delete($file);
- }
- $sitemap->chunks--;
- // Save the updated chunks and links values.
- $context['sandbox']['max'] = $sitemap->chunks;
- $sitemap->updated = REQUEST_TIME;
- xmlsitemap_sitemap_get_max_filesize($sitemap);
- xmlsitemap_sitemap_save($sitemap);
- }
- if ($sitemap->chunks != $context['sandbox']['max']) {
- $context['finished'] = $sitemap->chunks / $context['sandbox']['max'];
- }
- }
- /**
- * Batch callback; generate the index page of a sitemap.
- */
- function xmlsitemap_regenerate_batch_generate_index($smid, array &$context) {
- $sitemap = xmlsitemap_sitemap_load($smid);
- if ($sitemap->chunks > 1) {
- xmlsitemap_generate_index($sitemap);
- $context['message'] = t('Now generating sitemap index %sitemap-url.', array('%sitemap-url' => url('sitemap.xml', $sitemap->uri['options'])));
- }
- }
- /**
- * Batch callback; sitemap regeneration finished.
- */
- function xmlsitemap_regenerate_batch_finished($success, $results, $operations, $elapsed) {
- if ($success && !variable_get('xmlsitemap_regenerate_needed', FALSE)) {
- variable_set('xmlsitemap_generated_last', REQUEST_TIME);
- // drupal_set_message(t('The sitemaps were regenerated.'));
- // Show a watchdog message that the sitemap was regenerated.
- watchdog('xmlsitemap',
- 'Finished XML sitemap generation in @elapsed. Memory usage: @memory-peak.',
- array(
- '@elapsed' => $elapsed,
- '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
- ),
- WATCHDOG_NOTICE
- );
- module_invoke_all('xmlsitemap_regenerate_finished');
- }
- else {
- drupal_set_message(t('The sitemaps were not successfully regenerated.'), 'error');
- }
- }
- /**
- * Batch information callback for rebuilding the sitemap data.
- */
- function xmlsitemap_rebuild_batch(array $entities, $save_custom = FALSE) {
- $batch = array(
- 'operations' => array(),
- 'finished' => 'xmlsitemap_rebuild_batch_finished',
- 'title' => t('Rebuilding Sitemap'),
- 'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
- );
- // Set the rebuild flag in case something fails during the rebuild.
- $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_rebuild_needed' => TRUE)));
- // Purge any links first.
- $batch['operations'][] = array('xmlsitemap_rebuild_batch_clear',
- array($entities, (bool) $save_custom),
- );
- // Fetch all the sitemap links and save them to the {xmlsitemap} table.
- foreach ($entities as $entity) {
- $info = xmlsitemap_get_link_info($entity);
- $batch['operations'][] = array($info['xmlsitemap']['rebuild callback'], array($entity));
- }
- // Clear the rebuild flag.
- $batch['operations'][] = array('xmlsitemap_batch_variable_set', array(array('xmlsitemap_rebuild_needed' => FALSE)));
- // Add the regeneration batch.
- $regenerate_batch = xmlsitemap_regenerate_batch();
- $batch['operations'] = array_merge($batch['operations'], $regenerate_batch['operations']);
- return $batch;
- }
- /**
- * Batch callback; set an array of variables and their values.
- */
- function xmlsitemap_batch_variable_set(array $variables) {
- foreach ($variables as $variable => $value) {
- variable_set($variable, $value);
- }
- }
- /**
- * Batch callback; clear sitemap links for entites.
- */
- function xmlsitemap_rebuild_batch_clear(array $entities, $save_custom, &$context) {
- if (!empty($entities)) {
- xmlsitemap_rebuild_clear($entities, $save_custom);
- }
- $context['message'] = t('Purging links.');
- }
- /**
- * Batch callback; fetch and add the sitemap links for a specific entity.
- */
- function xmlsitemap_rebuild_batch_fetch($entity, &$context) {
- if (!isset($context['sandbox']['info'])) {
- $context['sandbox']['info'] = xmlsitemap_get_link_info($entity);
- $context['sandbox']['progress'] = 0;
- $context['sandbox']['last_id'] = 0;
- }
- $info = $context['sandbox']['info'];
- $query = new EntityFieldQuery();
- $query->entityCondition('entity_type', $entity);
- $query->entityCondition('entity_id', $context['sandbox']['last_id'], '>');
- $query->addTag('xmlsitemap_link_bundle_access');
- $query->addTag('xmlsitemap_rebuild');
- $query->addMetaData('entity', $entity);
- $query->addMetaData('entity_info', $info);
- if ($types = xmlsitemap_get_link_type_enabled_bundles($entity)) {
- $query->entityCondition('bundle', $types, 'IN');
- }
- else {
- // If no enabled bundle types, skip everything else.
- return;
- }
- if (!isset($context['sandbox']['max'])) {
- $count_query = clone $query;
- $count_query->count();
- $context['sandbox']['max'] = $count_query->execute();
- if (!$context['sandbox']['max']) {
- // If there are no items to process, skip everything else.
- return;
- }
- }
- // PostgreSQL cannot have the ORDERED BY in the count query.
- $query->entityOrderBy('entity_id');
- $limit = 20;
- $query->range(0, $limit);
- $result = $query->execute();
- $ids = array_keys($result[$entity]);
- $info['xmlsitemap']['process callback']($ids);
- $context['sandbox']['last_id'] = end($ids);
- $context['sandbox']['progress'] += count($ids);
- $context['message'] = t('Now processing %entity @last_id (@progress of @count).', array(
- '%entity' => $entity,
- '@last_id' => $context['sandbox']['last_id'],
- '@progress' => $context['sandbox']['progress'],
- '@count' => $context['sandbox']['max'],
- ));
- if ($context['sandbox']['progress'] >= $context['sandbox']['max']) {
- $context['finished'] = 1;
- }
- else {
- $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
- }
- }
- /**
- * Batch callback; sitemap rebuild finished.
- */
- function xmlsitemap_rebuild_batch_finished($success, $results, $operations, $elapsed) {
- if ($success && !variable_get('xmlsitemap_rebuild_needed', FALSE)) {
- drupal_set_message(t('The sitemap links were rebuilt.'));
- }
- else {
- drupal_set_message(t('The sitemap links were not successfully rebuilt.'), 'error');
- }
- }
- /**
- * Get Rebuildable link types.
- */
- function xmlsitemap_get_rebuildable_link_types() {
- $rebuild_types = array();
- $entities = xmlsitemap_get_link_info();
- foreach ($entities as $entity => $info) {
- if (empty($info['xmlsitemap']['rebuild callback'])) {
- // If the entity is missing a rebuild callback, skip.
- continue;
- }
- if (!empty($info['entity keys']['bundle']) && !xmlsitemap_get_link_type_enabled_bundles($entity)) {
- // If the entity has bundles, but no enabled bundles, skip since
- // rebuilding wouldn't get any links.
- continue;
- }
- else {
- $rebuild_types[] = $entity;
- }
- }
- return $rebuild_types;
- }
- /**
- * Clear all sitemap links for given entity types.
- *
- * @param array $types
- * An array of link types.
- * @param bool $save_custom
- * A boolean if links with status or priority overridden should not be
- * removed (and hence overridden values not lost).
- *
- * @return int
- * The number of deleted links.
- */
- function xmlsitemap_rebuild_clear(array $types, $save_custom) {
- // Let other modules respond to the rebuild clearing.
- module_invoke_all('xmlsitemap_rebuild_clear', $types, $save_custom);
- $query = db_delete('xmlsitemap');
- $query->condition('type', $types);
- // If we want to save the custom data, make sure to exclude any links
- // that are not using default inclusion or priority.
- if ($save_custom) {
- $query->condition('status_override', 0);
- $query->condition('priority_override', 0);
- }
- return $query->execute();
- }
|