123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233 |
- <?php
- /**
- * @file
- * Converts non-latin text to US-ASCII and sanitizes file names.
- *
- * @author Stefan M. Kudwien (http://drupal.org/user/48898)
- */
- /**
- * Implements hook_menu().
- */
- function transliteration_menu() {
- $items['admin/config/media/file-system/settings'] = array(
- 'title' => 'Settings',
- 'file path' => drupal_get_path('module', 'system'),
- 'weight' => -10,
- 'type' => MENU_DEFAULT_LOCAL_TASK,
- );
- $items['admin/config/media/file-system/transliteration'] = array(
- 'title' => 'Transliteration',
- 'description' => 'Convert existing file names to US-ASCII.',
- 'page callback' => 'drupal_get_form',
- 'page arguments' => array('transliteration_retroactive'),
- 'access arguments' => array('administer site configuration'),
- 'file' => 'transliteration.admin.inc',
- 'weight' => 10,
- 'type' => MENU_LOCAL_TASK,
- );
- return $items;
- }
- /**
- * Implements hook_form_FORM_ID_alter().
- *
- * Adds transliteration settings to the file system configuration form.
- */
- function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
- $form['transliteration'] = array(
- '#type' => 'item',
- '#title' => t('Transliteration'),
- '#value' => '',
- );
- $form['transliteration']['transliteration_file_uploads'] = array(
- '#type' => 'checkbox',
- '#title' => t('Transliterate file names during upload.'),
- '#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),
- '#default_value' => variable_get('transliteration_file_uploads', TRUE),
- );
- $form['transliteration']['transliteration_file_lowercase'] = array(
- '#type' => 'checkbox',
- '#title' => t('Lowercase transliterated file names.'),
- '#default_value' => variable_get('transliteration_file_lowercase', TRUE),
- '#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),
- );
- $form['buttons']['#weight'] = 1;
- }
- /**
- * Implements hook_form_FORM_ID_alter().
- *
- * Adds transliteration settings to the search settings form.
- */
- function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {
- $form['transliteration'] = array(
- '#type' => 'fieldset',
- '#title' => t('Transliteration'),
- );
- $form['transliteration']['transliteration_search'] = array(
- '#type' => 'checkbox',
- '#title' => t('Transliterate search index and searched strings.'),
- '#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),
- '#default_value' => variable_get('transliteration_search', TRUE),
- );
- }
- /**
- * Transliterates and sanitizes a file name.
- *
- * The resulting file name has white space replaced with underscores, consists
- * of only US-ASCII characters, and is converted to lowercase (if configured).
- * If multiple files have been submitted as an array, the names will be
- * processed recursively.
- *
- * @param $filename
- * A file name, or an array of file names.
- * @param $source_langcode
- * Optional ISO 639 language code that denotes the language of the input and
- * is used to apply language-specific variations. If the source language is
- * not known at the time of transliteration, it is recommended to set this
- * argument to the site default language to produce consistent results.
- * Otherwise the current display language will be used.
- * @return
- * Sanitized file name, or array of sanitized file names.
- *
- * @see language_default()
- */
- function transliteration_clean_filename($filename, $source_langcode = NULL) {
- if (is_array($filename)) {
- foreach ($filename as $key => $value) {
- $filename[$key] = transliteration_clean_filename($value, $source_langcode);
- }
- return $filename;
- }
- $filename = transliteration_get($filename, '', $source_langcode);
- // Replace whitespace.
- $filename = str_replace(' ', '_', $filename);
- // Remove remaining unsafe characters.
- $filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);
- // Remove multiple consecutive non-alphabetical characters.
- $filename = preg_replace('/(_)_+|(\.)\.+|(-)-+/', '\\1\\2\\3', $filename);
- // Force lowercase to prevent issues on case-insensitive file systems.
- if (variable_get('transliteration_file_lowercase', TRUE)) {
- $filename = strtolower($filename);
- }
- return $filename;
- }
- /**
- * Transliterates text.
- *
- * Takes an input string in any language and character set, and tries to
- * represent it in US-ASCII characters by conveying, in Roman letters, the
- * pronunciation expressed by the text in some other writing system.
- *
- * @param $text
- * UTF-8 encoded text input.
- * @param $unknown
- * Replacement string for characters that do not have a suitable ASCII
- * equivalent.
- * @param $source_langcode
- * Optional ISO 639 language code that denotes the language of the input and
- * is used to apply language-specific variations. If the source language is
- * not known at the time of transliteration, it is recommended to set this
- * argument to the site default language to produce consistent results.
- * Otherwise the current display language will be used.
- * @return
- * Transliterated text.
- *
- * @see language_default()
- */
- function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {
- if (!function_exists('_transliteration_process')) {
- module_load_include('inc', 'transliteration');
- }
- return _transliteration_process($text, $unknown, $source_langcode);
- }
- /**
- * Implements hook_init().
- *
- * Sanitizes file names during upload.
- */
- function transliteration_init() {
- if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {
- // Figure out language, which is available in $_POST['language'] for node
- // forms.
- $langcode = NULL;
- if (!empty($_POST['language'])) {
- $languages = language_list();
- if (isset($languages[$_POST['language']])) {
- $langcode = $_POST['language'];
- }
- }
- foreach ($_FILES['files']['name'] as $field => $filename) {
- // Keep a copy of the unaltered file name.
- $_FILES['files']['orig_name'][$field] = $filename;
- $_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
- }
- }
- }
- /**
- * Implements hook_search_preprocess().
- *
- * Transliterates text added to the search index and user submitted search
- * keywords.
- */
- function transliteration_search_preprocess($text) {
- if (variable_get('transliteration_search', TRUE)) {
- return transliteration_get($text, '', language_default('language'));
- }
- return $text;
- }
- /**
- * Implements hook_filter_info().
- */
- function transliteration_filter_info() {
- return array(
- 'transliteration' => array(
- 'title' => t('Convert all characters to US-ASCII'),
- 'process callback' => '_transliteration_filter_process',
- 'settings callback' => '_transliteration_filter_settings',
- 'default settings' => array(
- 'no_known_transliteration' => '?'
- ),
- 'tips callback' => '_transliteration_filter_tips',
- ),
- );
- }
- /**
- * Process callback for the transliteration filter.
- */
- function _transliteration_filter_process($text, $filter, $format, $langcode, $cache, $cache_id) {
- return transliteration_get($text, $filter->settings['no_known_transliteration'], $langcode);
- }
- /**
- * Settings callback for the transliteration filter.
- */
- function _transliteration_filter_settings($form, &$form_state, $filter, $format, $defaults, $filters) {
- $filter->settings += $defaults;
- return array(
- 'no_known_transliteration' => array(
- '#type' => 'textfield',
- '#title' => t('Placeholder for characters with no known US-ASCII equivalent'),
- '#size' => 2,
- // The maximum length is 5 in order to accommodate unicode multibyte input.
- '#maxlength' => 5,
- '#default_value' => $filter->settings['no_known_transliteration'],
- )
- );
- }
- /**
- * Filter tips callback for the transliteration filter.
- */
- function _transliteration_filter_tips($filter, $format, $long) {
- return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');
- }
|