transliteration.module 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. <?php
  2. /**
  3. * @file
  4. * Converts non-latin text to US-ASCII and sanitizes file names.
  5. *
  6. * @author Stefan M. Kudwien (http://drupal.org/user/48898)
  7. */
  8. /**
  9. * Implements hook_menu().
  10. */
  11. function transliteration_menu() {
  12. $items['admin/config/media/file-system/settings'] = array(
  13. 'title' => 'Settings',
  14. 'file path' => drupal_get_path('module', 'system'),
  15. 'weight' => -10,
  16. 'type' => MENU_DEFAULT_LOCAL_TASK,
  17. );
  18. $items['admin/config/media/file-system/transliteration'] = array(
  19. 'title' => 'Transliteration',
  20. 'description' => 'Convert existing file names to US-ASCII.',
  21. 'page callback' => 'drupal_get_form',
  22. 'page arguments' => array('transliteration_retroactive'),
  23. 'access arguments' => array('administer site configuration'),
  24. 'file' => 'transliteration.admin.inc',
  25. 'weight' => 10,
  26. 'type' => MENU_LOCAL_TASK,
  27. );
  28. return $items;
  29. }
  30. /**
  31. * Implements hook_form_FORM_ID_alter().
  32. *
  33. * Adds transliteration settings to the file system configuration form.
  34. */
  35. function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
  36. $form['transliteration'] = array(
  37. '#type' => 'item',
  38. '#title' => t('Transliteration'),
  39. '#value' => '',
  40. );
  41. $form['transliteration']['transliteration_file_uploads'] = array(
  42. '#type' => 'checkbox',
  43. '#title' => t('Transliterate file names during upload.'),
  44. '#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),
  45. '#default_value' => variable_get('transliteration_file_uploads', TRUE),
  46. );
  47. $form['transliteration']['transliteration_file_uploads_display_name'] = array(
  48. '#type' => 'checkbox',
  49. '#title' => t('Transliterate the displayed file name.'),
  50. '#description' => t('Enable to also convert the file name that is displayed within the site (for example, in link text).'),
  51. '#default_value' => variable_get('transliteration_file_uploads_display_name', TRUE),
  52. '#states' => array(
  53. 'invisible' => array(
  54. 'input[name="transliteration_file_uploads"]' => array('checked' => FALSE),
  55. ),
  56. ),
  57. );
  58. $form['transliteration']['transliteration_file_lowercase'] = array(
  59. '#type' => 'checkbox',
  60. '#title' => t('Lowercase transliterated file names.'),
  61. '#default_value' => variable_get('transliteration_file_lowercase', TRUE),
  62. '#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),
  63. '#states' => array(
  64. 'invisible' => array(
  65. 'input[name="transliteration_file_uploads"]' => array('checked' => FALSE),
  66. ),
  67. ),
  68. );
  69. $form['buttons']['#weight'] = 1;
  70. }
  71. /**
  72. * Implements hook_form_FORM_ID_alter().
  73. *
  74. * Adds transliteration settings to the search settings form.
  75. */
  76. function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {
  77. $form['transliteration'] = array(
  78. '#type' => 'fieldset',
  79. '#title' => t('Transliteration'),
  80. );
  81. $form['transliteration']['transliteration_search'] = array(
  82. '#type' => 'checkbox',
  83. '#title' => t('Transliterate search index and searched strings.'),
  84. '#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),
  85. '#default_value' => variable_get('transliteration_search', TRUE),
  86. );
  87. }
  88. /**
  89. * Transliterates and sanitizes a file name.
  90. *
  91. * The resulting file name has white space replaced with underscores, consists
  92. * of only US-ASCII characters, and is converted to lowercase (if configured).
  93. * If multiple files have been submitted as an array, the names will be
  94. * processed recursively.
  95. *
  96. * @param $filename
  97. * A file name, or an array of file names.
  98. * @param $source_langcode
  99. * Optional ISO 639 language code that denotes the language of the input and
  100. * is used to apply language-specific variations. If the source language is
  101. * not known at the time of transliteration, it is recommended to set this
  102. * argument to the site default language to produce consistent results.
  103. * Otherwise the current display language will be used.
  104. * @return
  105. * Sanitized file name, or array of sanitized file names.
  106. *
  107. * @see language_default()
  108. */
  109. function transliteration_clean_filename($filename, $source_langcode = NULL) {
  110. if (is_array($filename)) {
  111. foreach ($filename as $key => $value) {
  112. $filename[$key] = transliteration_clean_filename($value, $source_langcode);
  113. }
  114. return $filename;
  115. }
  116. // Allow other modules to alter the filename prior to processing.
  117. drupal_alter('transliteration_clean_filename_prepare', $filename, $source_langcode);
  118. $filename = transliteration_get($filename, '', $source_langcode);
  119. // Replace whitespace.
  120. $filename = str_replace(' ', '_', $filename);
  121. // Remove remaining unsafe characters.
  122. $filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);
  123. // Remove multiple consecutive non-alphabetical characters.
  124. $filename = preg_replace('/(_)_+|(\.)\.+|(-)-+/', '\\1\\2\\3', $filename);
  125. // Force lowercase to prevent issues on case-insensitive file systems.
  126. if (variable_get('transliteration_file_lowercase', TRUE)) {
  127. $filename = strtolower($filename);
  128. }
  129. return $filename;
  130. }
  131. /**
  132. * Transliterates text.
  133. *
  134. * Takes an input string in any language and character set, and tries to
  135. * represent it in US-ASCII characters by conveying, in Roman letters, the
  136. * pronunciation expressed by the text in some other writing system.
  137. *
  138. * @param $text
  139. * UTF-8 encoded text input.
  140. * @param $unknown
  141. * Replacement string for characters that do not have a suitable ASCII
  142. * equivalent.
  143. * @param $source_langcode
  144. * Optional ISO 639 language code that denotes the language of the input and
  145. * is used to apply language-specific variations. If the source language is
  146. * not known at the time of transliteration, it is recommended to set this
  147. * argument to the site default language to produce consistent results.
  148. * Otherwise the current display language will be used.
  149. * @return
  150. * Transliterated text.
  151. *
  152. * @see language_default()
  153. */
  154. function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {
  155. if (!function_exists('_transliteration_process')) {
  156. module_load_include('inc', 'transliteration');
  157. }
  158. return _transliteration_process($text, $unknown, $source_langcode);
  159. }
  160. /**
  161. * Implements hook_init().
  162. *
  163. * Sanitizes file names during upload.
  164. */
  165. function transliteration_init() {
  166. if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {
  167. // Figure out language, which is available in $_POST['language'] for node
  168. // forms.
  169. $langcode = NULL;
  170. if (!empty($_POST['language'])) {
  171. $languages = language_list();
  172. if (isset($languages[$_POST['language']])) {
  173. $langcode = $_POST['language'];
  174. }
  175. }
  176. foreach ($_FILES['files']['name'] as $field => $filename) {
  177. // Keep a copy of the unaltered file name.
  178. $_FILES['files']['orig_name'][$field] = $filename;
  179. $_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
  180. }
  181. }
  182. }
  183. /**
  184. * Implements hook_file_presave().
  185. */
  186. function transliteration_file_presave($file) {
  187. // If an uploaded file had its name altered in transliteration_init() and if
  188. // the human-readable display name is not being transliterated, restore the
  189. // original version as the human-readable name before saving. (The
  190. // transliterated version will still be used in the file URI, which is the
  191. // only place where it matters.)
  192. if (!empty($_FILES['files']['name']) && variable_get('transliteration_file_uploads', TRUE) && !variable_get('transliteration_file_uploads_display_name', TRUE)) {
  193. $key = array_search($file->filename, $_FILES['files']['name']);
  194. if ($key !== FALSE && isset($_FILES['files']['orig_name'][$key])) {
  195. $file->filename = $_FILES['files']['orig_name'][$key];
  196. }
  197. }
  198. }
  199. /**
  200. * Implements hook_search_preprocess().
  201. *
  202. * Transliterates text added to the search index and user submitted search
  203. * keywords.
  204. */
  205. function transliteration_search_preprocess($text) {
  206. if (variable_get('transliteration_search', TRUE)) {
  207. return transliteration_get($text, '', language_default('language'));
  208. }
  209. return $text;
  210. }
  211. /**
  212. * Implements hook_filter_info().
  213. */
  214. function transliteration_filter_info() {
  215. return array(
  216. 'transliteration' => array(
  217. 'title' => t('Convert all characters to US-ASCII'),
  218. 'process callback' => '_transliteration_filter_process',
  219. 'settings callback' => '_transliteration_filter_settings',
  220. 'default settings' => array(
  221. 'no_known_transliteration' => '?'
  222. ),
  223. 'tips callback' => '_transliteration_filter_tips',
  224. ),
  225. );
  226. }
  227. /**
  228. * Process callback for the transliteration filter.
  229. */
  230. function _transliteration_filter_process($text, $filter, $format, $langcode, $cache, $cache_id) {
  231. return transliteration_get($text, $filter->settings['no_known_transliteration'], $langcode);
  232. }
  233. /**
  234. * Settings callback for the transliteration filter.
  235. */
  236. function _transliteration_filter_settings($form, &$form_state, $filter, $format, $defaults, $filters) {
  237. $filter->settings += $defaults;
  238. return array(
  239. 'no_known_transliteration' => array(
  240. '#type' => 'textfield',
  241. '#title' => t('Placeholder for characters with no known US-ASCII equivalent'),
  242. '#size' => 2,
  243. // The maximum length is 5 in order to accommodate unicode multibyte input.
  244. '#maxlength' => 5,
  245. '#default_value' => $filter->settings['no_known_transliteration'],
  246. )
  247. );
  248. }
  249. /**
  250. * Filter tips callback for the transliteration filter.
  251. */
  252. function _transliteration_filter_tips($filter, $format, $long) {
  253. return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');
  254. }