processor_tokenizer.inc 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. <?php
  2. /**
  3. * Processor for tokenizing fulltext data by replacing (configurable)
  4. * non-letters with spaces.
  5. */
  6. class SearchApiTokenizer extends SearchApiAbstractProcessor {
  7. /**
  8. * @var string
  9. */
  10. protected $spaces;
  11. /**
  12. * @var string
  13. */
  14. protected $ignorable;
  15. public function configurationForm() {
  16. $form = parent::configurationForm();
  17. $form += array(
  18. 'spaces' => array(
  19. '#type' => 'textfield',
  20. '#title' => t('Whitespace characters'),
  21. '#description' => t('Specify the characters that should be regarded as whitespace and therefore used as word-delimiters. ' .
  22. 'Specify the characters as a <a href="@link">PCRE character class</a>. ' .
  23. 'Note: For non-English content, the default setting might not be suitable.',
  24. array('@link' => url('http://www.php.net/manual/en/regexp.reference.character-classes.php'))),
  25. '#default_value' => "[^[:alnum:]]",
  26. ),
  27. 'ignorable' => array(
  28. '#type' => 'textfield',
  29. '#title' => t('Ignorable characters'),
  30. '#description' => t('Specify characters which should be removed from fulltext fields and search strings (e.g., "-"). The same format as above is used.'),
  31. '#default_value' => "[']",
  32. ),
  33. );
  34. if (!empty($this->options)) {
  35. $form['spaces']['#default_value'] = $this->options['spaces'];
  36. $form['ignorable']['#default_value'] = $this->options['ignorable'];
  37. }
  38. return $form;
  39. }
  40. public function configurationFormValidate(array $form, array &$values, array &$form_state) {
  41. parent::configurationFormValidate($form, $values, $form_state);
  42. $spaces = str_replace('/', '\/', $values['spaces']);
  43. $ignorable = str_replace('/', '\/', $values['ignorable']);
  44. if (@preg_match('/(' . $spaces . ')+/u', '') === FALSE) {
  45. $el = $form['spaces'];
  46. form_error($el, $el['#title'] . ': ' . t('The entered text is no valid regular expression.'));
  47. }
  48. if (@preg_match('/(' . $ignorable . ')+/u', '') === FALSE) {
  49. $el = $form['ignorable'];
  50. form_error($el, $el['#title'] . ': ' . t('The entered text is no valid regular expression.'));
  51. }
  52. }
  53. protected function processFieldValue(&$value) {
  54. $this->prepare();
  55. if ($this->ignorable) {
  56. $value = preg_replace('/(' . $this->ignorable . ')+/u', '', $value);
  57. }
  58. if ($this->spaces) {
  59. $arr = preg_split('/(' . $this->spaces . ')+/u', $value);
  60. if (count($arr) > 1) {
  61. $value = array();
  62. foreach ($arr as $token) {
  63. $value[] = array('value' => $token);
  64. }
  65. }
  66. }
  67. }
  68. protected function process(&$value) {
  69. $this->prepare();
  70. if ($this->ignorable) {
  71. $value = preg_replace('/' . $this->ignorable . '+/u', '', $value);
  72. }
  73. if ($this->spaces) {
  74. $value = preg_replace('/' . $this->spaces . '+/u', ' ', $value);
  75. }
  76. }
  77. protected function prepare() {
  78. if (!isset($this->spaces)) {
  79. $this->spaces = str_replace('/', '\/', $this->options['spaces']);
  80. $this->ignorable = str_replace('/', '\/', $this->options['ignorable']);
  81. }
  82. }
  83. }