options += array( 'prefix' => '', 'suffix' => '', 'excerpt' => TRUE, 'excerpt_length' => 256, 'highlight' => 'always', 'highlight_partial' => FALSE, 'exclude_fields' => array(), ); $form['prefix'] = array( '#type' => 'textfield', '#title' => t('Highlighting prefix'), '#description' => t('Text/HTML that will be prepended to all occurrences of search keywords in highlighted text.'), '#default_value' => $this->options['prefix'], ); $form['suffix'] = array( '#type' => 'textfield', '#title' => t('Highlighting suffix'), '#description' => t('Text/HTML that will be appended to all occurrences of search keywords in highlighted text.'), '#default_value' => $this->options['suffix'], ); $form['excerpt'] = array( '#type' => 'checkbox', '#title' => t('Create excerpt'), '#description' => t('When enabled, an excerpt will be created for searches with keywords, containing all occurrences of keywords in a fulltext field.'), '#default_value' => $this->options['excerpt'], ); $form['excerpt_length'] = array( '#type' => 'textfield', '#title' => t('Excerpt length'), '#description' => t('The requested length of the excerpt, in characters.'), '#default_value' => $this->options['excerpt_length'], '#element_validate' => array('element_validate_integer_positive'), '#states' => array( 'visible' => array( '#edit-processors-search-api-highlighting-settings-excerpt' => array( 'checked' => TRUE, ), ), ), ); // Exclude certain fulltext fields. $fields = $this->index->getFields(); $fulltext_fields = array(); foreach ($this->index->getFulltextFields() as $field) { if (isset($fields[$field])) { $fulltext_fields[$field] = check_plain($fields[$field]['name'] . ' (' . $field . ')'); } } $form['exclude_fields'] = array( '#type' => 'checkboxes', '#title' => t('Exclude fields from excerpt'), '#description' => t('Exclude certain fulltext fields from being displayed in the excerpt.'), '#options' => $fulltext_fields, '#default_value' => $this->options['exclude_fields'], '#attributes' => array('class' => array('search-api-checkboxes-list')), ); $form['highlight'] = array( '#type' => 'select', '#title' => t('Highlight returned field data'), '#description' => t('Select whether returned fields should be highlighted.'), '#options' => array( 'always' => t('Always'), 'server' => t('If the server returns fields'), 'never' => t('Never'), ), '#default_value' => $this->options['highlight'], ); $form['highlight_partial'] = array( '#type' => 'checkbox', '#title' => t('Highlight partial matches'), '#description' => t('When enabled, matches in parts of words will be highlighted as well.'), '#default_value' => $this->options['highlight_partial'], ); return $form; } /** * {@inheritdoc} */ public function configurationFormValidate(array $form, array &$values, array &$form_state) { $values['exclude_fields'] = array_filter($values['exclude_fields']); } /** * {@inheritdoc} */ public function postprocessSearchResults(array &$response, SearchApiQuery $query) { if (empty($response['results']) || !($keys = $this->getKeywords($query))) { return; } $fulltext_fields = $this->index->getFulltextFields(); if (!empty($this->options['exclude_fields'])) { $fulltext_fields = drupal_map_assoc($fulltext_fields); foreach ($this->options['exclude_fields'] as $field) { unset($fulltext_fields[$field]); } } foreach ($response['results'] as $id => &$result) { if ($this->options['excerpt']) { $text = array(); $fields = $this->getFulltextFields($response['results'], $id, $fulltext_fields); foreach ($fields as $data) { if (is_array($data)) { $text = array_merge($text, $data); } else { $text[] = $data; } } $result['excerpt'] = $this->createExcerpt($this->flattenArrayValues($text), $keys); } if ($this->options['highlight'] != 'never') { $fields = $this->getFulltextFields($response['results'], $id, $fulltext_fields, $this->options['highlight'] == 'always'); foreach ($fields as $field => $data) { $result['fields'][$field] = array('#sanitize_callback' => FALSE); if (is_array($data)) { foreach ($data as $i => $text) { $result['fields'][$field]['#value'][$i] = $this->highlightField($text, $keys); } } else { $result['fields'][$field]['#value'] = $this->highlightField($data, $keys); } } } } } /** * Retrieves the fulltext data of a result. * * @param array $results * All results returned in the search, by reference. * @param int|string $i * The index in the results array of the result whose data should be * returned. * @param array $fulltext_fields * The fulltext fields from which the excerpt should be created. * @param bool $load * TRUE if the item should be loaded if necessary, FALSE if only fields * already returned in the results should be used. * * @return array * An array containing fulltext field names mapped to the text data * contained in them for the given result. */ protected function getFulltextFields(array &$results, $i, array $fulltext_fields, $load = TRUE) { global $language; $data = array(); $result = &$results[$i]; // Act as if $load is TRUE if we have a loaded item. $load |= !empty($result['entity']); $result += array('fields' => array()); // We only need detailed fields data if $load is TRUE. $fields = $load ? $this->index->getFields() : array(); $needs_extraction = array(); $returned_fields = search_api_get_sanitized_field_values(array_intersect_key($result['fields'], array_flip($fulltext_fields))); foreach ($fulltext_fields as $field) { if (array_key_exists($field, $returned_fields)) { $data[$field] = $returned_fields[$field]; } elseif ($load) { $needs_extraction[$field] = $fields[$field]; } } if (!$needs_extraction) { return $data; } if (empty($result['entity'])) { $items = $this->index->loadItems(array_keys($results)); foreach ($items as $id => $item) { $results[$id]['entity'] = $item; } } // If we still don't have a loaded item, we should stop trying. if (empty($result['entity'])) { return $data; } $wrapper = $this->index->entityWrapper($result['entity'], FALSE); $wrapper->language($language->language); $extracted = search_api_extract_fields($wrapper, $needs_extraction, array('sanitize' => TRUE)); foreach ($extracted as $field => $info) { if (isset($info['value'])) { $data[$field] = $info['value']; } } return $data; } /** * Extracts the positive keywords used in a search query. * * @param SearchApiQuery $query * The query from which to extract the keywords. * * @return array * An array of all unique positive keywords used in the query. */ protected function getKeywords(SearchApiQuery $query) { $keys = $query->getKeys(); if (!$keys) { return array(); } if (is_array($keys)) { return $this->flattenKeysArray($keys); } $keywords = preg_split(self::$split, $keys); // Assure there are no duplicates. (This is actually faster than // array_unique() by a factor of 3 to 4.) $keywords = drupal_map_assoc(array_filter($keywords)); // Remove quotes from keywords. foreach ($keywords as $key) { $keywords[$key] = trim($key, "'\""); } return drupal_map_assoc(array_filter($keywords)); } /** * Extracts the positive keywords from a keys array. * * @param array $keys * A search keys array, as specified by SearchApiQueryInterface::getKeys(). * * @return array * An array of all unique positive keywords contained in the keys. */ protected function flattenKeysArray(array $keys) { if (!empty($keys['#negation'])) { return array(); } $keywords = array(); foreach ($keys as $i => $key) { if (!element_child($i)) { continue; } if (is_array($key)) { $keywords += $this->flattenKeysArray($key); } else { $keywords[$key] = $key; } } return $keywords; } /** * Returns snippets from a piece of text, with certain keywords highlighted. * * Largely copied from search_excerpt(). * * @param string $text * The text to extract fragments from. * @param array $keys * Search keywords entered by the user. * * @return string|null * A string containing HTML for the excerpt, or NULL if none could be * created. */ protected function createExcerpt($text, array $keys) { // Prepare text by stripping HTML tags and decoding HTML entities. $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)); $text = decode_entities($text); $text = preg_replace('/\s+/', ' ', $text); $text = trim($text, ' '); $text_length = strlen($text); // Try to reach the requested excerpt length with about two fragments (each // with a keyword and some context). $ranges = array(); $length = 0; $look_start = array(); $remaining_keys = $keys; // Get the set excerpt length from the configuration. If the length is too // small, only use one fragment. $excerpt_length = $this->options['excerpt_length']; $context_length = round($excerpt_length / 4) - 3; if ($context_length < 32) { $context_length = round($excerpt_length / 2) - 1; } while ($length < $excerpt_length && !empty($remaining_keys)) { $found_keys = array(); foreach ($remaining_keys as $key) { if ($length >= $excerpt_length) { break; } // Remember where we last found $key, in case we are coming through a // second time. if (!isset($look_start[$key])) { $look_start[$key] = 0; } // See if we can find $key after where we found it the last time. Since // we are requiring a match on a word boundary, make sure $text starts // and ends with a space. $matches = array(); if (!$this->options['highlight_partial']) { $found_position = FALSE; $regex = '/' . static::$boundary . preg_quote($key, '/') . static::$boundary . '/iu'; if (preg_match($regex, ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $look_start[$key])) { $found_position = $matches[0][1]; } } else { $found_position = stripos($text, $key, $look_start[$key]); } if ($found_position !== FALSE) { $look_start[$key] = $found_position + 1; // Keep track of which keys we found this time, in case we need to // pass through again to find more text. $found_keys[] = $key; // Locate a space before and after this match, leaving some context on // each end. if ($found_position > $context_length) { $before = strpos($text, ' ', $found_position - $context_length); if ($before !== FALSE) { ++$before; } } else { $before = 0; } if ($before !== FALSE && $before <= $found_position) { if ($text_length > $found_position + $context_length) { $after = strrpos(substr($text, 0, $found_position + $context_length), ' ', $found_position); } else { $after = $text_length; } if ($after !== FALSE && $after > $found_position) { if ($before < $after) { // Save this range. $ranges[$before] = $after; $length += $after - $before; } } } } } // Next time through this loop, only look for keys we found this time, // if any. $remaining_keys = $found_keys; } if (!$ranges) { // We didn't find any keyword matches, return NULL. return NULL; } // Sort the text ranges by starting position. ksort($ranges); // Collapse overlapping text ranges into one. The sorting makes it O(n). $newranges = array(); $from1 = $to1 = NULL; foreach ($ranges as $from2 => $to2) { if ($from1 === NULL) { // This is the first time through this loop: initialize. $from1 = $from2; $to1 = $to2; continue; } if ($from2 <= $to1) { // The ranges overlap: combine them. $to1 = max($to1, $to2); } else { // The ranges do not overlap: save the working range and start a new // one. $newranges[$from1] = $to1; $from1 = $from2; $to1 = $to2; } } // Save the remaining working range. $newranges[$from1] = $to1; // Fetch text within the combined ranges we found. $out = array(); foreach ($newranges as $from => $to) { $out[] = substr($text, $from, $to - $from); } if (!$out) { return NULL; } // Let translators have the ... separator text as one chunk. $dots = explode('!excerpt', t('... !excerpt ... !excerpt ...')); $text = (isset($newranges[0]) ? '' : $dots[0]) . implode($dots[1], $out) . $dots[2]; $text = check_plain($text); // Since we stripped the tags at the beginning, highlighting doesn't need to // handle HTML anymore. return $this->highlightField($text, $keys, FALSE); } /** * Marks occurrences of the search keywords in a text field. * * @param string $text * The text of the field. * @param array $keys * Search keywords entered by the user. * @param bool $html * Whether the text can contain HTML tags or not. In the former case, text * inside tags (i.e., tag names and attributes) won't be highlighted. * * @return string * The field's text with all occurrences of search keywords highlighted. */ protected function highlightField($text, array $keys, $html = TRUE) { if (is_array($text)) { $text = $this->flattenArrayValues($text); } if ($html) { $texts = preg_split('#((?:"\']*|"[^"]*"|\'[^\']\')*>)+)#i', $text, -1, PREG_SPLIT_DELIM_CAPTURE); for ($i = 0; $i < count($texts); $i += 2) { $texts[$i] = $this->highlightField($texts[$i], $keys, FALSE); } return implode('', $texts); } $keys = implode('|', array_map('preg_quote', $keys, array_fill(0, count($keys), '/'))); // If "Highlight partial matches" is disabled, we only want to highlight // matches that are complete words. Otherwise, we want all of them. $boundary = empty($this->options['highlight_partial']) ? self::$boundary : ''; $regex = '/' . $boundary . '(?:' . $keys . ')' . $boundary . '/iu'; $replace = $this->options['prefix'] . '\0' . $this->options['suffix']; $text = preg_replace($regex, $replace, ' ' . $text . ' '); return substr($text, 1, -1); } /** * Flattens a (possibly multidimensional) array into a string. * * @param array $array * The array to flatten. * @param string $glue * (optional) The separator to insert between individual array items. * * @return string * The glued string. */ protected function flattenArrayValues(array $array, $glue = " \n\n ") { $ret = array(); foreach ($array as $item) { if (is_array($item)) { $ret[] = $this->flattenArrayValues($item, $glue); } else { $ret[] = $item; } } return implode($glue, $ret); } }