array(), 'negative' => array()); /** * Indicates whether the first pass query requires complex conditions (LIKE). * * @var boolean. */ protected $simple = TRUE; /** * Conditions that are used for exact searches. * * This is always used for the second pass query but not for the first pass, * unless $this->simple is FALSE. * * @var DatabaseCondition */ protected $conditions; /** * Indicates how many matches for a search query are necessary. * * @var int */ protected $matches = 0; /** * Array of search words. * * These words have to match against {search_index}.word. * * @var array */ protected $words = array(); /** * Multiplier for the normalized search score. * * This value is calculated by the first pass query and multiplied with the * actual score of a specific word to make sure that the resulting calculated * score is between 0 and 1. * * @var float */ protected $normalize; /** * Indicates whether the first pass query has been executed. * * @var boolean */ protected $executedFirstPass = FALSE; /** * Stores score expressions. * * @var array * * @see addScore() */ protected $scores = array(); /** * Stores arguments for score expressions. * * @var array */ protected $scoresArguments = array(); /** * Stores multipliers for score expressions. * * @var array */ protected $multiply = array(); /** * Whether or not search expressions were ignored. * * The maximum number of AND/OR combinations exceeded can be configured to * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored. * * @var boolean */ protected $expressionsIgnored = FALSE; /** * Sets up the search query expression. * * @param $query * A search query string, which can contain options. * @param $module * The search module. This maps to {search_index}.type in the database. * * @return * The SearchQuery object. */ public function searchExpression($expression, $module) { $this->searchExpression = $expression; $this->type = $module; return $this; } /** * Applies a search option and removes it from the search query string. * * These options are in the form option:value,value2,value3. * * @param $option * Name of the option. * @param $column * Name of the database column to which the value should be applied. * * @return * TRUE if a value for that option was found, FALSE if not. */ public function setOption($option, $column) { if ($values = search_expression_extract($this->searchExpression, $option)) { $or = db_or(); foreach (explode(',', $values) as $value) { $or->condition($column, $value); } $this->condition($or); $this->searchExpression = search_expression_insert($this->searchExpression, $option); return TRUE; } return FALSE; } /** * Parses the search query into SQL conditions. * * We build two queries that match the dataset bodies. */ protected function parseSearchExpression() { // Matchs words optionally prefixed by a dash. A word in this case is // something between two spaces, optionally quoted. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER); if (count($keywords) == 0) { return; } // Classify tokens. $or = FALSE; $warning = ''; $limit_combinations = variable_get('search_and_or_limit', 7); // The first search expression does not count as AND. $and_count = -1; $or_count = 0; foreach ($keywords as $match) { if ($or_count && $and_count + $or_count >= $limit_combinations) { // Ignore all further search expressions to prevent Denial-of-Service // attacks using a high number of AND/OR combinations. $this->expressionsIgnored = TRUE; break; } $phrase = FALSE; // Strip off phrase quotes. if ($match[2]{0} == '"') { $match[2] = substr($match[2], 1, -1); $phrase = TRUE; $this->simple = FALSE; } // Simplify keyword according to indexing rules and external // preprocessors. Use same process as during search indexing, so it // will match search index. $words = search_simplify($match[2]); // Re-explode in case simplification added more words, except when // matching a phrase. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); // Negative matches. if ($match[1] == '-') { $this->keys['negative'] = array_merge($this->keys['negative'], $words); } // OR operator: instead of a single keyword, we store an array of all // OR'd keywords. elseif ($match[2] == 'OR' && count($this->keys['positive'])) { $last = array_pop($this->keys['positive']); // Starting a new OR? if (!is_array($last)) { $last = array($last); } $this->keys['positive'][] = $last; $or = TRUE; $or_count++; continue; } // AND operator: implied, so just ignore it. elseif ($match[2] == 'AND' || $match[2] == 'and') { $warning = $match[2]; continue; } // Plain keyword. else { if ($match[2] == 'or') { $warning = $match[2]; } if ($or) { // Add to last element (which is an array). $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words); } else { $this->keys['positive'] = array_merge($this->keys['positive'], $words); $and_count++; } } $or = FALSE; } // Convert keywords into SQL statements. $this->conditions = db_and(); $simple_and = FALSE; $simple_or = FALSE; // Positive matches. foreach ($this->keys['positive'] as $key) { // Group of ORed terms. if (is_array($key) && count($key)) { $simple_or = TRUE; $any = FALSE; $queryor = db_or(); foreach ($key as $or) { list($num_new_scores) = $this->parseWord($or); $any |= $num_new_scores; $queryor->condition('d.data', "% $or %", 'LIKE'); } if (count($queryor)) { $this->conditions->condition($queryor); // A group of OR keywords only needs to match once. $this->matches += ($any > 0); } } // Single ANDed term. else { $simple_and = TRUE; list($num_new_scores, $num_valid_words) = $this->parseWord($key); $this->conditions->condition('d.data', "% $key %", 'LIKE'); if (!$num_valid_words) { $this->simple = FALSE; } // Each AND keyword needs to match at least once. $this->matches += $num_new_scores; } } if ($simple_and && $simple_or) { $this->simple = FALSE; } // Negative matches. foreach ($this->keys['negative'] as $key) { $this->conditions->condition('d.data', "% $key %", 'NOT LIKE'); $this->simple = FALSE; } if ($warning == 'or') { drupal_set_message(t('Search for either of the two terms with uppercase OR. For example, cats OR dogs.')); } } /** * Helper function for parseQuery(). */ protected function parseWord($word) { $num_new_scores = 0; $num_valid_words = 0; // Determine the scorewords of this word/phrase. $split = explode(' ', $word); foreach ($split as $s) { $num = is_numeric($s); if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) { if (!isset($this->words[$s])) { $this->words[$s] = $s; $num_new_scores++; } $num_valid_words++; } } // Return matching snippet and number of added words. return array($num_new_scores, $num_valid_words); } /** * Executes the first pass query. * * This can either be done explicitly, so that additional scores and * conditions can be applied to the second pass query, or implicitly by * addScore() or execute(). * * @return * TRUE if search items exist, FALSE if not. */ public function executeFirstPass() { $this->parseSearchExpression(); if (count($this->words) == 0) { form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.')); return FALSE; } if ($this->expressionsIgnored) { drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => variable_get('search_and_or_limit', 7))), 'warning'); } $this->executedFirstPass = TRUE; if (!empty($this->words)) { $or = db_or(); foreach ($this->words as $word) { $or->condition('i.word', $word); } $this->condition($or); } // Build query for keyword normalization. $this->join('search_total', 't', 'i.word = t.word'); $this ->condition('i.type', $this->type) ->groupBy('i.type') ->groupBy('i.sid') ->having('COUNT(*) >= :matches', array(':matches' => $this->matches)); // Clone the query object to do the firstPass query; $first = clone $this->query; // For complex search queries, add the LIKE conditions to the first pass query. if (!$this->simple) { $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); $first->condition($this->conditions); } // Calculate maximum keyword relevance, to normalize it. $first->addExpression('SUM(i.score * t.count)', 'calculated_score'); $this->normalize = $first ->range(0, 1) ->orderBy('calculated_score', 'DESC') ->execute() ->fetchField(); if ($this->normalize) { return TRUE; } return FALSE; } /** * Adds a custom score expression to the search query. * * Score expressions are used to order search results. If no calls to * addScore() have taken place, a default keyword relevance score will be * used. However, if at least one call to addScore() has taken place, the * keyword relevance score is not automatically added. * * Also note that if you call orderBy() directly on the query, search scores * will not automatically be used to order search results. Your orderBy() * expression can reference 'calculated_score', which will be the total * calculated score value. * * @param $score * The score expression, which should evaluate to a number between 0 and 1. * The string 'i.relevance' in a score expression will be replaced by a * measure of keyword relevance between 0 and 1. * @param $arguments * Query arguments needed to provide values to the score expression. * @param $multiply * If set, the score is multiplied with this value. However, all scores * with multipliers are then divided by the total of all multipliers, so * that overall, the normalization is maintained. * * @return object * The updated query object. */ public function addScore($score, $arguments = array(), $multiply = FALSE) { if ($multiply) { $i = count($this->multiply); // Modify the score expression so it is multiplied by the multiplier, // with a divisor to renormalize. $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)"; // Add an argument for the multiplier. The :total_$i argument is taken // care of in the execute() method, which is when the total divisor is // calculated. $arguments[':multiply_' . $i] = $multiply; $this->multiply[] = $multiply; } $this->scores[] = $score; $this->scoresArguments += $arguments; return $this; } /** * Executes the search. * * If not already done, this executes the first pass query. Then the complex * conditions are applied to the query including score expressions and * ordering. * * @return * FALSE if the first pass query returned no results, and a database result * set if there were results. */ public function execute() { if (!$this->executedFirstPass) { $this->executeFirstPass(); } if (!$this->normalize) { return new DatabaseStatementEmpty(); } // Add conditions to query. $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); $this->condition($this->conditions); if (empty($this->scores)) { // Add default score. $this->addScore('i.relevance'); } if (count($this->multiply)) { // Re-normalize scores with multipliers by dividing by the total of all // multipliers. The expressions were altered in addScore(), so here just // add the arguments for the total. $i = 0; $sum = array_sum($this->multiply); foreach ($this->multiply as $total) { $this->scoresArguments[':total_' . $i] = $sum; $i++; } } // Replace the pseudo-expression 'i.relevance' with a measure of keyword // relevance in all score expressions, using string replacement. Careful // though! If you just print out a float, some locales use ',' as the // decimal separator in PHP, while SQL always uses '.'. So, make sure to // set the number format correctly. $relevance = number_format((1.0 / $this->normalize), 10, '.', ''); $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores); // Add all scores together to form a query field. $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments); // If an order has not yet been set for this query, add a default order // that sorts by the calculated sum of scores. if (count($this->getOrderBy()) == 0) { $this->orderBy('calculated_score', 'DESC'); } // Add tag and useful metadata. $this ->addTag('search_' . $this->type) ->addMetaData('normalize', $this->normalize) ->fields('i', array('type', 'sid')); return $this->query->execute(); } /** * Builds the default count query for SearchQuery. * * Since SearchQuery always uses GROUP BY, we can default to a subquery. We * also add the same conditions as execute() because countQuery() is called * first. */ public function countQuery() { // Clone the inner query. $inner = clone $this->query; // Add conditions to query. $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); $inner->condition($this->conditions); // Remove existing fields and expressions, they are not needed for a count // query. $fields =& $inner->getFields(); $fields = array(); $expressions =& $inner->getExpressions(); $expressions = array(); // Add the sid as the only field and count them as a subquery. $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave')); // Add the COUNT() expression. $count->addExpression('COUNT(*)'); return $count; } }