123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- <?php
- /**
- * Processor for removing stopwords from index and search terms.
- */
- class SearchApiStopWords extends SearchApiAbstractProcessor {
- /**
- * Holds all words ignored for the last query.
- *
- * @var array
- */
- protected $ignored = array();
- public function configurationForm() {
- $form = parent::configurationForm();
- $form += array(
- 'help' => array(
- '#markup' => '<p>' . t('Provide a stopwords file or enter the words in this form. If you do both, both will be used. Read about !stopwords.', array('!stopwords' => l(t('stop words'), "http://en.wikipedia.org/wiki/Stop_words"))) . '</p>',
- ),
- 'file' => array(
- '#type' => 'textfield',
- '#title' => t('Stopwords file URI'),
- '#title' => t('Enter the URI of your stopwords.txt file'),
- '#description' => t('This must be a stream-type description like <code>public://stopwords/stopwords.txt</code> or <code>http://example.com/stopwords.txt</code> or <code>private://stopwords.txt</code>.'),
- ),
- 'stopwords' => array(
- '#type' => 'textarea',
- '#title' => t('Stopwords'),
- '#description' => t('Enter a space and/or linebreak separated list of stopwords that will be removed from content before it is indexed and from search terms before searching.'),
- '#default_value' => t("but\ndid\nthe this that those\netc"),
- ),
- );
- if (!empty($this->options)) {
- $form['file']['#default_value'] = $this->options['file'];
- $form['stopwords']['#default_value'] = $this->options['stopwords'];
- }
- return $form;
- }
- public function configurationFormValidate(array $form, array &$values, array &$form_state) {
- parent::configurationFormValidate($form, $values, $form_state);
- $stopwords = trim($values['stopwords']);
- $uri = $values['file'];
- if (empty($stopwords) && empty($uri)) {
- $el = $form['file'];
- form_error($el, $el['#title'] . ': ' . t('At stopwords file or words are required.'));
- }
- if (!empty($uri) && !file_get_contents($uri)) {
- $el = $form['file'];
- form_error($el, t('Stopwords file') . ': ' . t('The file %uri is not readable or does not exist.', array('%uri' => $uri)));
- }
- }
- public function process(&$value) {
- $stopwords = $this->getStopWords();
- if (empty($stopwords) && !is_string($value)) {
- return;
- }
- $words = preg_split('/\s+/', $value);
- foreach ($words as $sub_key => $sub_value) {
- if (isset($stopwords[$sub_value])) {
- unset($words[$sub_key]);
- $this->ignored[] = $sub_value;
- }
- }
- $value = implode(' ', $words);
- }
- public function preprocessSearchQuery(SearchApiQuery $query) {
- $this->ignored = array();
- parent::preprocessSearchQuery($query);
- }
- public function postprocessSearchResults(array &$response, SearchApiQuery $query) {
- if ($this->ignored) {
- if (isset($response['ignored'])) {
- $response['ignored'] = array_merge($response['ignored'], $this->ignored);
- }
- else {
- $response['ignored'] = $this->ignored;
- }
- }
- }
- /**
- * @return
- * An array whose keys are the stopwords set in either the file or the text
- * field.
- */
- protected function getStopWords() {
- if (isset($this->stopwords)) {
- return $this->stopwords;
- }
- $file_words = $form_words = array();
- if (!empty($this->options['file']) && $stopwords_file = file_get_contents($this->options['file'])) {
- $file_words = preg_split('/\s+/', $stopwords_file);
- }
- if (!empty($this->options['stopwords'])) {
- $form_words = preg_split('/\s+/', $this->options['stopwords']);
- }
- $this->stopwords = array_flip(array_merge($file_words, $form_words));
- return $this->stopwords;
- }
- }
|