| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269 | <?php/** * @file * Converts non-latin text to US-ASCII and sanitizes file names. * * @author Stefan M. Kudwien (http://drupal.org/user/48898) *//** * Implements hook_menu(). */function transliteration_menu() {  $items['admin/config/media/file-system/settings'] = array(    'title' => 'Settings',    'file path' => drupal_get_path('module', 'system'),    'weight' => -10,    'type' => MENU_DEFAULT_LOCAL_TASK,  );  $items['admin/config/media/file-system/transliteration'] = array(    'title' => 'Transliteration',    'description' => 'Convert existing file names to US-ASCII.',    'page callback' => 'drupal_get_form',    'page arguments' => array('transliteration_retroactive'),    'access arguments' => array('administer site configuration'),    'file' => 'transliteration.admin.inc',    'weight' => 10,    'type' => MENU_LOCAL_TASK,  );  return $items;}/** * Implements hook_form_FORM_ID_alter(). * * Adds transliteration settings to the file system configuration form. */function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {  $form['transliteration'] = array(    '#type' => 'item',    '#title' => t('Transliteration'),    '#value' => '',  );  $form['transliteration']['transliteration_file_uploads'] = array(    '#type' => 'checkbox',    '#title' => t('Transliterate file names during upload.'),    '#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),    '#default_value' => variable_get('transliteration_file_uploads', TRUE),  );  $form['transliteration']['transliteration_file_uploads_display_name'] = array(    '#type' => 'checkbox',    '#title' => t('Transliterate the displayed file name.'),    '#description' => t('Enable to also convert the file name that is displayed within the site (for example, in link text).'),    '#default_value' => variable_get('transliteration_file_uploads_display_name', TRUE),    '#states' => array(      'invisible' => array(        'input[name="transliteration_file_uploads"]' => array('checked' => FALSE),      ),    ),  );  $form['transliteration']['transliteration_file_lowercase'] = array(    '#type' => 'checkbox',    '#title' => t('Lowercase transliterated file names.'),    '#default_value' => variable_get('transliteration_file_lowercase', TRUE),    '#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),    '#states' => array(      'invisible' => array(        'input[name="transliteration_file_uploads"]' => array('checked' => FALSE),      ),    ),  );  $form['buttons']['#weight'] = 1;}/** * Implements hook_form_FORM_ID_alter(). * * Adds transliteration settings to the search settings form. */function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {  $form['transliteration'] = array(    '#type' => 'fieldset',    '#title' => t('Transliteration'),  );  $form['transliteration']['transliteration_search'] = array(    '#type' => 'checkbox',    '#title' => t('Transliterate search index and searched strings.'),    '#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),    '#default_value' => variable_get('transliteration_search', TRUE),  );}/** * Transliterates and sanitizes a file name. * * The resulting file name has white space replaced with underscores, consists * of only US-ASCII characters, and is converted to lowercase (if configured). * If multiple files have been submitted as an array, the names will be * processed recursively. * * @param $filename *   A file name, or an array of file names. * @param $source_langcode *   Optional ISO 639 language code that denotes the language of the input and *   is used to apply language-specific variations. If the source language is *   not known at the time of transliteration, it is recommended to set this *   argument to the site default language to produce consistent results. *   Otherwise the current display language will be used. * @return *   Sanitized file name, or array of sanitized file names. * * @see language_default() */function transliteration_clean_filename($filename, $source_langcode = NULL) {  if (is_array($filename)) {    foreach ($filename as $key => $value) {      $filename[$key] = transliteration_clean_filename($value, $source_langcode);    }    return $filename;  }  // Allow other modules to alter the filename prior to processing.  drupal_alter('transliteration_clean_filename_prepare', $filename, $source_langcode);  $filename = transliteration_get($filename, '', $source_langcode);  // Replace whitespace.  $filename = str_replace(' ', '_', $filename);  // Remove remaining unsafe characters.  $filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);  // Remove multiple consecutive non-alphabetical characters.  $filename = preg_replace('/(_)_+|(\.)\.+|(-)-+/', '\\1\\2\\3', $filename);  // Force lowercase to prevent issues on case-insensitive file systems.  if (variable_get('transliteration_file_lowercase', TRUE)) {    $filename = strtolower($filename);  }  return $filename;}/** * Transliterates text. * * Takes an input string in any language and character set, and tries to * represent it in US-ASCII characters by conveying, in Roman letters, the * pronunciation expressed by the text in some other writing system. * * @param $text *   UTF-8 encoded text input. * @param $unknown *   Replacement string for characters that do not have a suitable ASCII *   equivalent. * @param $source_langcode *   Optional ISO 639 language code that denotes the language of the input and *   is used to apply language-specific variations. If the source language is *   not known at the time of transliteration, it is recommended to set this *   argument to the site default language to produce consistent results. *   Otherwise the current display language will be used. * @return *   Transliterated text. * * @see language_default() */function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {  if (!function_exists('_transliteration_process')) {    module_load_include('inc', 'transliteration');  }  return _transliteration_process($text, $unknown, $source_langcode);}/** * Implements hook_init(). * * Sanitizes file names during upload. */function transliteration_init() {  if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {    // Figure out language, which is available in $_POST['language'] for node    // forms.    $langcode = NULL;    if (!empty($_POST['language'])) {      $languages = language_list();      if (isset($languages[$_POST['language']])) {        $langcode = $_POST['language'];      }    }    foreach ($_FILES['files']['name'] as $field => $filename) {      // Keep a copy of the unaltered file name.      $_FILES['files']['orig_name'][$field] = $filename;      $_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);    }  }}/** * Implements hook_file_presave(). */function transliteration_file_presave($file) {  // If an uploaded file had its name altered in transliteration_init() and if  // the human-readable display name is not being transliterated, restore the  // original version as the human-readable name before saving. (The  // transliterated version will still be used in the file URI, which is the  // only place where it matters.)  if (!empty($_FILES['files']['name']) && variable_get('transliteration_file_uploads', TRUE) && !variable_get('transliteration_file_uploads_display_name', TRUE)) {    $key = array_search($file->filename, $_FILES['files']['name']);    if ($key !== FALSE && isset($_FILES['files']['orig_name'][$key])) {      $file->filename = $_FILES['files']['orig_name'][$key];    }  }}/** * Implements hook_search_preprocess(). * * Transliterates text added to the search index and user submitted search * keywords. */function transliteration_search_preprocess($text) {  if (variable_get('transliteration_search', TRUE)) {    return transliteration_get($text, '', language_default('language'));  }  return $text;}/** * Implements hook_filter_info(). */function transliteration_filter_info() {  return array(    'transliteration' => array(      'title' => t('Convert all characters to US-ASCII'),      'process callback' => '_transliteration_filter_process',      'settings callback' => '_transliteration_filter_settings',      'default settings' => array(        'no_known_transliteration' => '?'      ),      'tips callback' => '_transliteration_filter_tips',    ),  );}/** * Process callback for the transliteration filter. */function _transliteration_filter_process($text, $filter, $format, $langcode, $cache, $cache_id) {  return transliteration_get($text, $filter->settings['no_known_transliteration'], $langcode);}/** * Settings callback for the transliteration filter. */function _transliteration_filter_settings($form, &$form_state, $filter, $format, $defaults, $filters) {  $filter->settings += $defaults;  return array(    'no_known_transliteration' => array(      '#type' => 'textfield',      '#title' => t('Placeholder for characters with no known US-ASCII equivalent'),      '#size' => 2,      // The maximum length is 5 in order to accommodate unicode multibyte input.      '#maxlength' => 5,      '#default_value' => $filter->settings['no_known_transliteration'],    )  );}/** * Filter tips callback for the transliteration filter. */function _transliteration_filter_tips($filter, $format, $long) {  return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');}
 |