service.inc 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227
  1. <?php
  2. /**
  3. * Search service class using the database for storing index information.
  4. */
  5. class SearchApiDbService extends SearchApiAbstractService {
  6. protected $previous_db;
  7. protected $query_options = array();
  8. protected $ignored = array();
  9. protected $warnings = array();
  10. public function configurationForm(array $form, array &$form_state) {
  11. if (empty($this->options)) {
  12. global $databases;
  13. foreach ($databases as $key => $targets) {
  14. foreach ($targets as $target => $info) {
  15. $options[$key]["$key:$target"] = "$key > $target";
  16. }
  17. }
  18. if (count($options) > 1 || count(reset($options)) > 1) {
  19. $form['database'] = array(
  20. '#type' => 'select',
  21. '#title' => t('Database'),
  22. '#description' => t('Select the database key and target to use for storing indexing information in. ' .
  23. 'Cannot be changed after creation.'),
  24. '#options' => $options,
  25. '#default_value' => 'default:default',
  26. '#required' => TRUE,
  27. );
  28. }
  29. else {
  30. $form['database'] = array(
  31. '#type' => 'value',
  32. '#value' => "$key:$target",
  33. );
  34. }
  35. $form['min_chars'] = array(
  36. '#type' => 'select',
  37. '#title' => t('Minimum word length'),
  38. '#description' => t('The minimum number of characters a word must consist of to be indexed.'),
  39. '#options' => drupal_map_assoc(array(1, 2, 3, 4, 5, 6)),
  40. '#default_value' => 1,
  41. );
  42. }
  43. else {
  44. $form = array(
  45. 'database' => array(
  46. '#type' => 'value',
  47. '#title' => t('Database'), // Slight hack for the "View server" page.
  48. '#value' => $this->options['database'],
  49. ),
  50. 'database_text' => array(
  51. '#type' => 'item',
  52. '#title' => t('Database'),
  53. '#markup' => check_plain(str_replace(':', ' > ', $this->options['database'])),
  54. ),
  55. 'min_chars' => array(
  56. '#type' => 'select',
  57. '#title' => t('Minimum word length'),
  58. '#description' => t('The minimum number of characters a word must consist of to be indexed.'),
  59. '#options' => drupal_map_assoc(array(1, 2, 3, 4, 5, 6)),
  60. '#default_value' => $this->options['min_chars'],
  61. ),
  62. );
  63. }
  64. return $form;
  65. }
  66. public function supportsFeature($feature) {
  67. return $feature == 'search_api_facets';
  68. }
  69. public function postUpdate() {
  70. return $this->server->options != $this->server->original->options;
  71. }
  72. public function preDelete() {
  73. // Only react on real deletes, not on reverts.
  74. if ($this->server->hasStatus(ENTITY_IN_CODE)) {
  75. return;
  76. }
  77. if (empty($this->options['indexes'])) {
  78. return;
  79. }
  80. foreach ($this->options['indexes'] as $index) {
  81. foreach ($index as $field) {
  82. db_drop_table($field['table']);
  83. }
  84. }
  85. }
  86. public function addIndex(SearchApiIndex $index) {
  87. $this->options += array('indexes' => array());
  88. $indexes = &$this->options['indexes'];
  89. if (isset($indexes[$index->machine_name])) {
  90. // Easiest and safest method to ensure all of the index' data is properly re-added.
  91. $this->removeIndex($index);
  92. }
  93. if (empty($index->options['fields']) || !is_array($index->options['fields'])) {
  94. // No fields, no work.
  95. $indexes[$index->machine_name] = array();
  96. $this->server->save();
  97. return $this;
  98. }
  99. $prefix = 'search_api_db_' . $index->machine_name. '_';
  100. $indexes[$index->machine_name] = array();
  101. foreach ($index->getFields() as $name => $field) {
  102. $table = $this->findFreeTable($prefix, $name);
  103. $this->createFieldTable($index, $field, $table);
  104. $indexes[$index->machine_name][$name]['table'] = $table;
  105. $indexes[$index->machine_name][$name]['type'] = $field['type'];
  106. $indexes[$index->machine_name][$name]['boost'] = $field['boost'];
  107. }
  108. $this->server->save();
  109. }
  110. /**
  111. * Helper method for finding free table names for fields.
  112. *
  113. * MySQL 5.0 imposes a 64 characters length limit for table names, PostgreSQL
  114. * 8.3 only allows 63 characters. Therefore, always return a name at most 63
  115. * characters long.
  116. */
  117. protected function findFreeTable($prefix, $name) {
  118. // A DB prefix might further reduce the maximum length of the table name.
  119. $maxlen = 63;
  120. list($key, $target) = explode(':', $this->options['database'], 2);
  121. if ($db_prefix = Database::getConnection($target, $key)->tablePrefix()) {
  122. $maxlen -= drupal_strlen($db_prefix);
  123. }
  124. $base = $table = drupal_substr($prefix . drupal_strtolower(preg_replace('/[^a-z0-9]/i', '_', $name)), 0, $maxlen);
  125. $i = 0;
  126. while (db_table_exists($table)) {
  127. $suffix = '_' . ++$i;
  128. $table = drupal_substr($base, 0, $maxlen - drupal_strlen($suffix)) . $suffix;
  129. }
  130. return $table;
  131. }
  132. /**
  133. * Helper method for creating the table for a field.
  134. */
  135. protected function createFieldTable(SearchApiIndex $index, $field, $name) {
  136. $table = array(
  137. 'name' => $name,
  138. 'module' => 'search_api_db',
  139. 'fields' => array(
  140. 'item_id' => array(
  141. 'description' => 'The primary identifier of the item.',
  142. 'not null' => TRUE,
  143. ),
  144. ),
  145. );
  146. // The type of the item_id field depends on the ID field's type.
  147. $id_field = $index->datasource()->getIdFieldInfo();
  148. $table['fields']['item_id'] += $this->sqlType($id_field['type'] == 'text' ? 'string' : $id_field['type']);
  149. if (isset($table['fields']['item_id']['length'])) {
  150. // A length of 255 is overkill for IDs. 50 should be more than enough.
  151. $table['fields']['item_id']['length'] = 50;
  152. }
  153. $type = search_api_extract_inner_type($field['type']);
  154. if ($type == 'text') {
  155. $table['fields']['word'] = array(
  156. 'description' => 'The text of the indexed token.',
  157. 'type' => 'varchar',
  158. 'length' => 50,
  159. 'not null' => TRUE,
  160. );
  161. $table['fields']['score'] = array(
  162. 'description' => 'The score associated with this token.',
  163. 'type' => 'float',
  164. 'not null' => TRUE,
  165. );
  166. $table['primary key'] = array('item_id', 'word');
  167. $table['indexes']['word'] = array(array('word', 10));
  168. }
  169. else {
  170. $table['fields']['value'] = $this->sqlType($type);
  171. $table['fields']['value'] += array('description' => "The field's value for this item.");
  172. if ($type != $field['type']) {
  173. // This is a list type.
  174. $table['fields']['value']['not null'] = TRUE;
  175. $table['primary key'] = array('item_id', 'value');
  176. }
  177. else {
  178. $table['primary key'] = array('item_id');
  179. }
  180. $table['indexes']['value'] = $table['fields']['value'] == 'varchar' ? array(array('value', 10)) : array('value');
  181. }
  182. $set = $this->setDb();
  183. db_create_table($name, $table);
  184. //Some DBMSs will need a character encoding and collation set.
  185. global $databases;
  186. list($key, $target) = explode(':', $this->options['database'], 2);
  187. $db_driver = $databases[$key][$target]['driver'];
  188. switch ($db_driver) {
  189. case 'mysql':
  190. db_query("ALTER TABLE {{$name}} CONVERT TO CHARACTER SET 'utf8' COLLATE 'utf8_bin'")->execute();
  191. break;
  192. // @todo Add fixes for other DBMSs.
  193. case 'oracle':
  194. case 'pgsql':
  195. case 'sqlite':
  196. case 'sqlsrv':
  197. break;
  198. }
  199. if ($set) {
  200. $this->resetDb();
  201. }
  202. }
  203. protected function sqlType($type) {
  204. $type = search_api_extract_inner_type($type);
  205. switch ($type) {
  206. case 'string':
  207. case 'uri':
  208. return array('type' => 'varchar', 'length' => 255);
  209. case 'integer':
  210. case 'duration':
  211. case 'date': // 'datetime' sucks. This way, we just convert every input into a timestamp.
  212. return array('type' => 'int');
  213. case 'decimal':
  214. return array('type' => 'float');
  215. case 'boolean':
  216. return array('type' => 'int', 'size' => 'tiny');
  217. default:
  218. throw new SearchApiException(t('Unknown field type @type. Database search module might be out of sync with Search API.', array('@type' => $type)));
  219. }
  220. }
  221. public function fieldsUpdated(SearchApiIndex $index) {
  222. $fields = &$this->options['indexes'][$index->machine_name];
  223. $new_fields = $index->getFields();
  224. $reindex = FALSE;
  225. $cleared = FALSE;
  226. $set = $this->setDb();
  227. foreach ($fields as $name => $field) {
  228. if (!isset($new_fields[$name])) {
  229. db_drop_table($field['table']);
  230. unset($fields[$name]);
  231. continue;
  232. }
  233. $old_type = $field['type'];
  234. $new_type = $new_fields[$name]['type'];
  235. $fields[$name]['type'] = $new_type;
  236. $fields[$name]['boost'] = $new_fields[$name]['boost'];
  237. $old_inner_type = search_api_extract_inner_type($old_type);
  238. $new_inner_type = search_api_extract_inner_type($new_type);
  239. if ($old_type != $new_type) {
  240. if ($old_inner_type == 'text' || $new_inner_type == 'text'
  241. || search_api_list_nesting_level($old_type) != search_api_list_nesting_level($new_type)) {
  242. // A change in fulltext or list status necessitates completely
  243. // clearing the index.
  244. $reindex = TRUE;
  245. if (!$cleared) {
  246. $cleared = TRUE;
  247. $this->deleteItems('all', $index);
  248. }
  249. db_drop_table($field['table']);
  250. $this->createFieldTable($index, $new_fields[$name], $field['table']);
  251. }
  252. elseif ($this->sqlType($old_inner_type) != $this->sqlType($new_inner_type)) {
  253. // There is a change in SQL type. We don't have to clear the index, since types can be converted.
  254. db_change_field($field['table'], 'value', 'value', $this->sqlType($new_type) + array('description' => "The field's value for this item."));
  255. $reindex = TRUE;
  256. }
  257. elseif ($old_inner_type == 'date' || $new_inner_type == 'date') {
  258. // Even though the SQL type stays the same, we have to reindex since conversion rules change.
  259. $reindex = TRUE;
  260. }
  261. }
  262. elseif (!$reindex && $new_inner_type == 'text' && $field['boost'] != $new_fields[$name]['boost']) {
  263. $multiplier = $new_fields[$name]['boost'] / $field['boost'];
  264. db_update($field['table'], $this->query_options)
  265. ->expression('score', 'score * :mult', array(':mult' => $multiplier))
  266. ->execute();
  267. }
  268. unset($new_fields[$name]);
  269. }
  270. $prefix = 'search_api_db_' . $index->machine_name. '_';
  271. // These are new fields that were previously not indexed.
  272. foreach ($new_fields as $name => $field) {
  273. $reindex = TRUE;
  274. $table = $this->findFreeTable($prefix, $name);
  275. $this->createFieldTable($index, $field, $table);
  276. $fields[$name]['table'] = $table;
  277. $fields[$name]['type'] = $field['type'];
  278. $fields[$name]['boost'] = $field['boost'];
  279. }
  280. if ($set) {
  281. $this->resetDb();
  282. }
  283. $this->server->save();
  284. return $reindex;
  285. }
  286. public function removeIndex($index) {
  287. $id = is_object($index) ? $index->machine_name : $index;
  288. if (!isset($this->options['indexes'][$id])) {
  289. return;
  290. }
  291. $set = $this->setDb();
  292. foreach ($this->options['indexes'][$id] as $field) {
  293. db_drop_table($field['table']);
  294. }
  295. if ($set) {
  296. $this->resetDb();
  297. }
  298. unset($this->options['indexes'][$id]);
  299. $this->server->save();
  300. }
  301. public function indexItems(SearchApiIndex $index, array $items) {
  302. if (empty($this->options['indexes'][$index->machine_name])) {
  303. throw new SearchApiException(t('No field settings for index with id @id.', array('@id' => $index->machine_name)));
  304. }
  305. $indexed = array();
  306. $set = $this->setDb();
  307. foreach ($items as $id => $item) {
  308. try {
  309. if ($this->indexItem($index, $id, $item)) {
  310. $indexed[] = $id;
  311. }
  312. }
  313. catch (Exception $e) {
  314. // We just log the error, hoping we can index the other items.
  315. watchdog('search_api_db', check_plain($e->getMessage()), NULL, WATCHDOG_WARNING);
  316. }
  317. }
  318. if ($set) {
  319. $this->resetDb();
  320. }
  321. return $indexed;
  322. }
  323. protected function indexItem(SearchApiIndex $index, $id, array $item) {
  324. $fields = $this->options['indexes'][$index->machine_name];
  325. $fields_updated = FALSE;
  326. $txn = db_transaction('search_api_indexing', $this->query_options);
  327. try {
  328. foreach ($item as $name => $field) {
  329. // Sometimes index changes are not triggering the update hooks
  330. // correctly. Therefore, to avoid DB errors, we re-check the tables
  331. // here before indexing.
  332. if (empty($fields[$name]['table']) && !$fields_updated) {
  333. unset($this->options['indexes'][$index->machine_name][$name]);
  334. $this->fieldsUpdated($index);
  335. $fields_updated = TRUE;
  336. $fields = $this->options['indexes'][$index->machine_name];
  337. }
  338. if (empty($fields[$name]['table'])) {
  339. watchdog('search_api_db', "Unknown field !field: please check (and re-save) the index's fields settings.",
  340. array('!field' => $name), WATCHDOG_WARNING);
  341. continue;
  342. }
  343. $table = $fields[$name]['table'];
  344. $boost = $fields[$name]['boost'];
  345. db_delete($table, $this->query_options)
  346. ->condition('item_id', $id)
  347. ->execute();
  348. // Don't index null values
  349. if($field['value'] === NULL) {
  350. continue;
  351. }
  352. $type = $field['type'];
  353. $value = $this->convert($field['value'], $type, $field['original_type'], $index);
  354. if (search_api_is_text_type($type, array('text', 'tokens'))) {
  355. $words = array();
  356. foreach ($value as $token) {
  357. // Taken from core search to reflect less importance of words later
  358. // in the text.
  359. // Focus is a decaying value in terms of the amount of unique words
  360. // up to this point. From 100 words and more, it decays, to e.g. 0.5
  361. // at 500 words and 0.3 at 1000 words.
  362. $focus = min(1, .01 + 3.5 / (2 + count($words) * .015));
  363. $value = &$token['value'];
  364. if (is_numeric($value)) {
  365. $value = ltrim($value, '-0');
  366. }
  367. elseif (drupal_strlen($value) < $this->options['min_chars']) {
  368. continue;
  369. }
  370. $value = drupal_strtolower($value);
  371. $token['score'] *= $focus;
  372. if (!isset($words[$value])) {
  373. $words[$value] = $token;
  374. }
  375. else {
  376. $words[$value]['score'] += $token['score'];
  377. }
  378. }
  379. if ($words) {
  380. $query = db_insert($table, $this->query_options)
  381. ->fields(array('item_id', 'word', 'score'));
  382. foreach ($words as $word) {
  383. $query->values(array(
  384. 'item_id' => $id,
  385. 'word' => $word['value'],
  386. 'score' => $word['score'] * $boost,
  387. ));
  388. }
  389. $query->execute();
  390. }
  391. }
  392. elseif (search_api_is_list_type($type)) {
  393. $values = array();
  394. if (is_array($value)) {
  395. foreach ($value as $v) {
  396. if ($v !== NULL) {
  397. $values[$v] = TRUE;
  398. }
  399. }
  400. $values = array_keys($values);
  401. }
  402. else {
  403. $values[] = $value;
  404. }
  405. if ($values) {
  406. $insert = db_insert($table, $this->query_options)
  407. ->fields(array('item_id', 'value'));
  408. foreach ($values as $v) {
  409. $insert->values(array(
  410. 'item_id' => $id,
  411. 'value' => $v,
  412. ));
  413. }
  414. $insert->execute();
  415. }
  416. }
  417. elseif (isset($value)) {
  418. db_insert($table, $this->query_options)
  419. ->fields(array(
  420. 'item_id' => $id,
  421. 'value' => $value,
  422. ))
  423. ->execute();
  424. }
  425. }
  426. }
  427. catch (Exception $e) {
  428. $txn->rollback();
  429. throw $e;
  430. }
  431. return TRUE;
  432. }
  433. protected function convert($value, $type, $original_type, SearchApiIndex $index) {
  434. if (search_api_is_list_type($type)) {
  435. $type = substr($type, 5, -1);
  436. $original_type = search_api_extract_inner_type($original_type);
  437. $ret = array();
  438. if (is_array($value)) {
  439. foreach ($value as $v) {
  440. $v = $this->convert($v, $type, $original_type, $index);
  441. // Don't add NULL values to the return array. Also, adding an empty
  442. // array is, of course, a waste of time.
  443. if (isset($v) && $v !== array()) {
  444. $ret = array_merge($ret, is_array($v) ? $v : array($v));
  445. }
  446. }
  447. }
  448. return $ret;
  449. }
  450. if (!isset($value)) {
  451. // For text fields, we have to return an array even if the value is NULL.
  452. return search_api_is_text_type($type, array('text', 'tokens')) ? array() : NULL;
  453. }
  454. switch ($type) {
  455. case 'text':
  456. $ret = array();
  457. foreach (preg_split('/[^\p{L}\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) as $v) {
  458. if ($v) {
  459. $ret[] = array(
  460. 'value' => $v,
  461. 'score' => 1.0,
  462. );
  463. }
  464. }
  465. $value = $ret;
  466. // FALL-THROUGH!
  467. case 'tokens':
  468. while (TRUE) {
  469. foreach ($value as $i => $v) {
  470. // Check for over-long tokens.
  471. $score = $v['score'];
  472. $v = $v['value'];
  473. if (drupal_strlen($v) > 50) {
  474. $words = preg_split('/[^\p{L}\p{N}]+/u', $v, -1, PREG_SPLIT_NO_EMPTY);
  475. if (count($words) > 1 && max(array_map('drupal_strlen', $words)) <= 50) {
  476. // Overlong token is due to bad tokenizing.
  477. // Check for "Tokenizer" preprocessor on index.
  478. if (empty($index->options['processors']['search_api_tokenizer']['status'])) {
  479. watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing, due to bad tokenizing. ' .
  480. 'It is recommended to enable the "Tokenizer" preprocessor for indexes using database servers. ' .
  481. 'Otherwise, the service class has to use its own, fixed tokenizing.', array(), WATCHDOG_WARNING);
  482. }
  483. else {
  484. watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing, due to bad tokenizing. ' .
  485. 'Please check your settings for the "Tokenizer" preprocessor to ensure that data is tokenized correctly.',
  486. array(), WATCHDOG_WARNING);
  487. }
  488. }
  489. $tokens = array();
  490. foreach ($words as $word) {
  491. if (drupal_strlen($word) > 50) {
  492. watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing: %word.<br />' .
  493. 'Database search servers currently cannot index such words correctly – the word was therefore trimmed to the allowed length.',
  494. array('%word' => $word), WATCHDOG_WARNING);
  495. $word = drupal_substr($word, 0, 50);
  496. }
  497. $tokens[] = array(
  498. 'value' => $word,
  499. 'score' => $score,
  500. );
  501. }
  502. array_splice($value, $i, 1, $tokens);
  503. continue 2;
  504. }
  505. }
  506. break;
  507. }
  508. return $value;
  509. case 'string':
  510. case 'uri':
  511. // For non-dates, PHP can handle this well enough
  512. if ($original_type == 'date') {
  513. return date('%c', $value);
  514. }
  515. if (drupal_strlen($value) > 255) {
  516. throw new SearchApiException(t('A string value longer than 255 characters was encountered. ' .
  517. "Such values currently aren't supported by the database backend."));
  518. }
  519. return $value;
  520. case 'integer':
  521. case 'duration':
  522. case 'decimal':
  523. return 0 + $value;
  524. case 'boolean':
  525. return $value ? 1 : 0;
  526. case 'date':
  527. if (is_numeric($value) || !$value) {
  528. return 0 + $value;
  529. }
  530. return strtotime($value);
  531. default:
  532. throw new SearchApiException(t('Unknown field type @type. Database search module might be out of sync with Search API.', array('@type' => $type)));
  533. }
  534. }
  535. public function deleteItems($ids = 'all', SearchApiIndex $index = NULL) {
  536. if (!$index) {
  537. if (empty($this->options['indexes'])) {
  538. return;
  539. }
  540. $set = $this->setDb();
  541. foreach ($this->options['indexes'] as $index) {
  542. foreach ($index as $fields) {
  543. foreach ($fields as $field) {
  544. db_truncate($field['table'], $this->query_options)->execute();
  545. }
  546. }
  547. }
  548. if ($set) {
  549. $this->resetDb();
  550. }
  551. return;
  552. }
  553. if (empty($this->options['indexes'][$index->machine_name])) {
  554. return;
  555. }
  556. $set = $this->setDb();
  557. foreach ($this->options['indexes'][$index->machine_name] as $field) {
  558. if (is_array($ids)) {
  559. db_delete($field['table'], $this->query_options)
  560. ->condition('item_id', $ids, 'IN')
  561. ->execute();
  562. }
  563. else {
  564. db_truncate($field['table'], $this->query_options)->execute();
  565. }
  566. }
  567. if ($set) {
  568. $this->resetDb();
  569. }
  570. }
  571. public function search(SearchApiQueryInterface $query) {
  572. $time_method_called = microtime(TRUE);
  573. $set = $this->setDb();
  574. $index = $query->getIndex();
  575. if (empty($this->options['indexes'][$index->machine_name])) {
  576. throw new SearchApiException(t('Unknown index @id.', array('@id' => $index->machine_name)));
  577. }
  578. $fields = $this->options['indexes'][$index->machine_name];
  579. $keys = &$query->getKeys();
  580. $keys_set = (boolean) $keys;
  581. $keys = $this->prepareKeys($keys);
  582. if ($keys && !(is_array($keys) && count($keys) == 1)) {
  583. $fulltext_fields = $query->getFields();
  584. if ($fulltext_fields) {
  585. $_fulltext_fields = $fulltext_fields;
  586. $fulltext_fields = array();
  587. foreach ($_fulltext_fields as $name) {
  588. if (!isset($fields[$name])) {
  589. throw new SearchApiException(t('Unknown field @field specified as search target.', array('@field' => $name)));
  590. }
  591. if (!search_api_is_text_type($fields[$name]['type'])) {
  592. throw new SearchApiException(t('Cannot perform fulltext search on field @field of type @type.', array('@field' => $name, '@type' => $fields[$name]['type'])));
  593. }
  594. $fulltext_fields[$name] = $fields[$name];
  595. }
  596. $db_query = $this->createKeysQuery($keys, $fulltext_fields, $fields);
  597. if (is_array($keys) && !empty($keys['#negation'])) {
  598. $db_query->addExpression(':score', 'score', array(':score' => 1));
  599. }
  600. }
  601. else {
  602. $msg = t('Search keys are given but no fulltext fields are defined.');
  603. watchdog('search_api_db', $msg, NULL, WATCHDOG_WARNING);
  604. $this->warnings[$msg] = 1;
  605. }
  606. }
  607. elseif ($keys_set) {
  608. $msg = t('No valid search keys were present in the query.');
  609. $this->warnings[$msg] = 1;
  610. }
  611. if (!isset($db_query)) {
  612. $db_query = db_select($fields['search_api_language']['table'], 't', $this->query_options);
  613. $db_query->addField('t', 'item_id', 'item_id');
  614. $db_query->addExpression(':score', 'score', array(':score' => 1));
  615. }
  616. $filter = $query->getFilter();
  617. if ($filter->getFilters()) {
  618. $condition = $this->createFilterCondition($filter, $fields, $db_query);
  619. if ($condition) {
  620. $db_query->condition($condition);
  621. }
  622. }
  623. $db_query->addTag('search_api_db_search');
  624. $time_processing_done = microtime(TRUE);
  625. $results = array();
  626. $count_query = $db_query->countQuery();
  627. $results['result count'] = $count_query->execute()->fetchField();
  628. if ($results['result count']) {
  629. if ($query->getOption('search_api_facets')) {
  630. $results['search_api_facets'] = $this->getFacets($query, clone $db_query);
  631. }
  632. $query_options = $query->getOptions();
  633. if (isset($query_options['offset']) || isset($query_options['limit'])) {
  634. $offset = isset($query_options['offset']) ? $query_options['offset'] : 0;
  635. $limit = isset($query_options['limit']) ? $query_options['limit'] : 1000000;
  636. $db_query->range($offset, $limit);
  637. }
  638. $sort = $query->getSort();
  639. if ($sort) {
  640. foreach ($sort as $field_name => $order) {
  641. if ($order != 'ASC' && $order != 'DESC') {
  642. $msg = t('Unknown sort order @order. Assuming "ASC".', array('@order' => $order));
  643. $this->warnings[$msg] = $msg;
  644. $order = 'ASC';
  645. }
  646. if ($field_name == 'search_api_relevance') {
  647. $db_query->orderBy('score', $order);
  648. continue;
  649. }
  650. if ($field_name == 'search_api_id') {
  651. $db_query->orderBy('item_id', $order);
  652. continue;
  653. }
  654. if (!isset($fields[$field_name])) {
  655. throw new SearchApiException(t('Trying to sort on unknown field @field.', array('@field' => $field_name)));
  656. }
  657. $field = $fields[$field_name];
  658. if (search_api_is_list_type($field['type'])) {
  659. throw new SearchApiException(t('Cannot sort on field @field of a list type.', array('@field' => $field_name)));
  660. }
  661. if (search_api_is_text_type($field['type'])) {
  662. throw new SearchApiException(t('Cannot sort on fulltext field @field.', array('@field' => $field_name)));
  663. }
  664. $alias = $this->getTableAlias($field, $db_query);
  665. $db_query->orderBy($alias . '.value', $order);
  666. }
  667. }
  668. else {
  669. $db_query->orderBy('score', 'DESC');
  670. }
  671. $result = $db_query->execute();
  672. $time_queries_done = microtime(TRUE);
  673. foreach ($result as $row) {
  674. $results['results'][$row->item_id] = array(
  675. 'id' => $row->item_id,
  676. 'score' => $row->score,
  677. );
  678. }
  679. }
  680. else {
  681. $time_queries_done = microtime(TRUE);
  682. $results['results'] = array();
  683. }
  684. $results['warnings'] = array_keys($this->warnings);
  685. $results['ignored'] = array_keys($this->ignored);
  686. if ($set) {
  687. $this->resetDb();
  688. }
  689. $time_end = microtime(TRUE);
  690. $results['performance'] = array(
  691. 'complete' => $time_end - $time_method_called,
  692. 'preprocessing' => $time_processing_done - $time_method_called,
  693. 'execution' => $time_queries_done - $time_processing_done,
  694. 'postprocessing' => $time_end - $time_queries_done,
  695. );
  696. return $results;
  697. }
  698. /**
  699. * Helper method for removing unnecessary nested expressions from keys.
  700. */
  701. protected function prepareKeys($keys) {
  702. if (is_scalar($keys)) {
  703. $keys = $this->splitKeys($keys);
  704. return is_array($keys) ? $this->eliminateDuplicates($keys) : $keys;
  705. }
  706. elseif (!$keys) {
  707. return NULL;
  708. }
  709. $keys = $this->eliminateDuplicates($this->splitKeys($keys));
  710. $conj = $keys['#conjunction'];
  711. $neg = !empty($keys['#negation']);
  712. foreach ($keys as $i => &$nested) {
  713. if (is_array($nested)) {
  714. $nested = $this->prepareKeys($nested);
  715. if ($neg == !empty($nested['#negation'])) {
  716. if ($nested['#conjunction'] == $conj) {
  717. unset($nested['#conjunction'], $nested['#negation']);
  718. foreach ($nested as $renested) {
  719. $keys[] = $renested;
  720. }
  721. unset($keys[$i]);
  722. }
  723. }
  724. }
  725. }
  726. $keys = array_filter($keys);
  727. if (($count = count($keys)) <= 2) {
  728. if ($count < 2 || isset($keys['#negation'])) {
  729. $keys = NULL;
  730. }
  731. else {
  732. unset($keys['#conjunction']);
  733. $keys = array_shift($keys);
  734. }
  735. }
  736. return $keys;
  737. }
  738. /**
  739. * Helper method for splitting keys.
  740. */
  741. protected function splitKeys($keys) {
  742. if (is_scalar($keys)) {
  743. $proc = drupal_strtolower(trim($keys));
  744. if (is_numeric($proc)) {
  745. return ltrim($proc, '-0');
  746. }
  747. elseif (drupal_strlen($proc) < $this->options['min_chars']) {
  748. $this->ignored[$keys] = 1;
  749. return NULL;
  750. }
  751. $words = preg_split('/[^\p{L}\p{N}]+/u', $proc, -1, PREG_SPLIT_NO_EMPTY);
  752. if (count($words) > 1) {
  753. $proc = $this->splitKeys($words);
  754. $proc['#conjunction'] = 'AND';
  755. }
  756. return $proc;
  757. }
  758. foreach ($keys as $i => $key) {
  759. if (element_child($i)) {
  760. $keys[$i] = $this->splitKeys($key);
  761. }
  762. }
  763. return array_filter($keys);
  764. }
  765. /**
  766. * Helper method for eliminating duplicates from the search keys.
  767. */
  768. protected function eliminateDuplicates($keys, &$words = array()) {
  769. foreach ($keys as $i => $word) {
  770. if (!element_child($i)) {
  771. continue;
  772. }
  773. if (is_scalar($word)) {
  774. if (isset($words[$word])) {
  775. unset($keys[$i]);
  776. }
  777. else {
  778. $words[$word] = TRUE;
  779. }
  780. }
  781. else {
  782. $keys[$i] = $this->eliminateDuplicates($word, $words);
  783. }
  784. }
  785. return $keys;
  786. }
  787. /**
  788. * Helper method for creating a SELECT query for given search keys.
  789. *
  790. * @return SelectQueryInterface
  791. * A SELECT query returning item_id and score (or only item_id, if
  792. * $keys['#negation'] is set).
  793. */
  794. protected function createKeysQuery($keys, array $fields, array $all_fields) {
  795. if (!is_array($keys)) {
  796. $keys = array(
  797. '#conjunction' => 'AND',
  798. $keys,
  799. );
  800. }
  801. $or = db_or();
  802. $neg = !empty($keys['#negation']);
  803. $conj = $keys['#conjunction'];
  804. $words = array();
  805. $nested = array();
  806. $negated = array();
  807. $db_query = NULL;
  808. $mul_words = FALSE;
  809. $not_nested = FALSE;
  810. foreach ($keys as $i => $key) {
  811. if (!element_child($i)) {
  812. continue;
  813. }
  814. if (is_scalar($key)) {
  815. $words[] = $key;
  816. }
  817. elseif (empty($key['#negation'])) {
  818. if ($neg) {
  819. // If this query is negated, we also only need item_ids from
  820. // subqueries.
  821. $key['#negation'] = TRUE;
  822. }
  823. $nested[] = $key;
  824. }
  825. else {
  826. $negated[] = $key;
  827. }
  828. }
  829. $subs = count($words) + count($nested);
  830. $not_nested = ($subs <= 1 && count($fields) == 1) || ($neg && $conj == 'OR' && !$negated);
  831. if ($words) {
  832. if (count($words) > 1) {
  833. $mul_words = TRUE;
  834. foreach ($words as $word) {
  835. $or->condition('word', $word);
  836. }
  837. }
  838. else {
  839. $word = array_shift($words);
  840. }
  841. foreach ($fields as $name => $field) {
  842. $table = $field['table'];
  843. $query = db_select($table, 't', $this->query_options);
  844. if ($neg) {
  845. $query->fields('t', array('item_id'));
  846. }
  847. elseif ($not_nested) {
  848. $query->fields('t', array('item_id', 'score'));
  849. }
  850. else {
  851. $query->fields('t');
  852. }
  853. if ($mul_words) {
  854. $query->condition($or);
  855. }
  856. else {
  857. $query->condition('word', $word);
  858. }
  859. if (!isset($db_query)) {
  860. $db_query = $query;
  861. }
  862. elseif ($not_nested) {
  863. $db_query->union($query, 'UNION');
  864. }
  865. else {
  866. $db_query->union($query, 'UNION ALL');
  867. }
  868. }
  869. }
  870. if ($nested) {
  871. $word = '';
  872. foreach ($nested as $k) {
  873. $query = $this->createKeysQuery($k, $fields, $all_fields);
  874. if (!$neg) {
  875. $word .= ' ';
  876. $var = ':word' . strlen($word);
  877. $query->addExpression($var, 'word', array($var => $word));
  878. }
  879. if (!isset($db_query)) {
  880. $db_query = $query;
  881. }
  882. elseif ($not_nested) {
  883. $db_query->union($query, 'UNION');
  884. }
  885. else {
  886. $db_query->union($query, 'UNION ALL');
  887. }
  888. }
  889. }
  890. if (isset($db_query) && !$not_nested) {
  891. $db_query = db_select($db_query, 't', $this->query_options);
  892. $db_query->addField('t', 'item_id', 'item_id');
  893. if (!$neg) {
  894. $db_query->addExpression('SUM(t.score)', 'score');
  895. $db_query->groupBy('t.item_id');
  896. }
  897. if ($conj == 'AND' && $subs > 1) {
  898. $var = ':subs' . ((int) $subs);
  899. if (!$db_query->getGroupBy()) {
  900. $db_query->groupBy('t.item_id');
  901. }
  902. if ($mul_words) {
  903. $db_query->having('COUNT(DISTINCT t.word) >= ' . $var, array($var => $subs));
  904. }
  905. else {
  906. $db_query->having('COUNT(DISTINCT t.word) >= ' . $var, array($var => $subs));
  907. }
  908. }
  909. }
  910. if ($negated) {
  911. if (!isset($db_query) || $conj == 'OR') {
  912. if (isset($all_fields['search_api_language'])) {
  913. // We use this table because all items should be contained exactly once.
  914. $table = $all_fields['search_api_language']['table'];
  915. }
  916. else {
  917. $distinct = TRUE;
  918. foreach ($all_fields as $field) {
  919. $table = $field['table'];
  920. if (!search_api_is_list_type($field['type']) && !search_api_is_text_type($field['type'])) {
  921. unset($distinct);
  922. break;
  923. }
  924. }
  925. }
  926. if (isset($db_query)) {
  927. // We are in a rather bizarre case where the keys are something like "a OR (NOT b)".
  928. $old_query = $db_query;
  929. }
  930. $db_query = db_select($table, 't', $this->query_options);
  931. $db_query->addField('t', 'item_id', 'item_id');
  932. if (!$neg) {
  933. $db_query->addExpression(':score', 'score', array(':score' => 1));
  934. }
  935. if (isset($distinct)) {
  936. $db_query->distinct();
  937. }
  938. }
  939. if ($conj == 'AND') {
  940. foreach ($negated as $k) {
  941. $db_query->condition('t.item_id', $this->createKeysQuery($k, $fields, $all_fields), 'NOT IN');
  942. }
  943. }
  944. else {
  945. $or = db_or();
  946. foreach ($negated as $k) {
  947. $or->condition('t.item_id', $this->createKeysQuery($k, $fields, $all_fields), 'NOT IN');
  948. }
  949. if (isset($old_query)) {
  950. $or->condition('t.item_id', $old_query, 'NOT IN');
  951. }
  952. $db_query->condition($or);
  953. }
  954. }
  955. return $db_query;
  956. }
  957. /**
  958. * Helper method for finding any needed table for a filter query.
  959. */
  960. protected function findTable(array $filters, array $fields) {
  961. foreach ($filters as $filter) {
  962. if (is_array($filter)) {
  963. return $fields[$filter[0]]['table'];
  964. }
  965. }
  966. foreach ($filters as $filter) {
  967. if (is_object($filter)) {
  968. $ret = $this->findTable($filter->getFilters(), $fields);
  969. if ($ret) {
  970. return $ret;
  971. }
  972. }
  973. }
  974. }
  975. /**
  976. * Helper method for creating a condition for filtering search results.
  977. *
  978. * @return QueryConditionInterface
  979. */
  980. protected function createFilterCondition(SearchApiQueryFilterInterface $filter, array $fields, SelectQueryInterface $db_query) {
  981. $cond = db_condition($filter->getConjunction());
  982. $empty = TRUE;
  983. foreach ($filter->getFilters() as $f) {
  984. if (is_object($f)) {
  985. $c = $this->createFilterCondition($f, $fields, $db_query);
  986. if ($c) {
  987. $empty = FALSE;
  988. $cond->condition($c);
  989. }
  990. }
  991. else {
  992. $empty = FALSE;
  993. if (!isset($fields[$f[0]])) {
  994. throw new SearchApiException(t('Unknown field in filter clause: @field.', array('@field' => $f[0])));
  995. }
  996. $field = $fields[$f[0]];
  997. if ($f[1] === NULL) {
  998. $query = db_select($field['table'], 't')
  999. ->fields('t', array('item_id'));
  1000. $cond->condition('t.item_id', $query, $f[2] == '<>' || $f[2] == '!=' ? 'IN' : 'NOT IN');
  1001. continue;
  1002. }
  1003. if (search_api_is_text_type($field['type'])) {
  1004. $keys = $this->prepareKeys($f[1]);
  1005. $query = $this->createKeysQuery($keys, array($field), $fields);
  1006. // We don't need the score.
  1007. $query_expressions = &$query->getExpressions();
  1008. $query_expressions = array();
  1009. $cond->condition('t.item_id', $query, $f[2] == '<>' || $f[2] == '!=' ? 'NOT IN' : 'IN');
  1010. }
  1011. else {
  1012. $alias = $this->getTableAlias($field, $db_query, search_api_is_list_type($field['type']));
  1013. $cond->condition($alias . '.value', $f[1], $f[2]);
  1014. }
  1015. }
  1016. }
  1017. return $empty ? NULL : $cond;
  1018. }
  1019. /**
  1020. * Helper method for adding a field's table to a database query.
  1021. *
  1022. * @param array $field
  1023. * The field information array. The "table" key should contain the table
  1024. * name to which a join should be made.
  1025. * @param SelectQueryInterface $db_query
  1026. * The database query used.
  1027. * @param $newjoin
  1028. * If TRUE, a join is done even if the table was already joined to in the
  1029. * query.
  1030. */
  1031. protected function getTableAlias(array $field, SelectQueryInterface $db_query, $newjoin = FALSE) {
  1032. if(!$newjoin) {
  1033. foreach ($db_query->getTables() as $alias => $info) {
  1034. $table = $info['table'];
  1035. if (is_scalar($table) && $table == $field['table']) {
  1036. return $alias;
  1037. }
  1038. }
  1039. }
  1040. return $db_query->join($field['table'], 't', 't.item_id = %alias.item_id');
  1041. }
  1042. /**
  1043. * Helper method for getting the facet values for a query.
  1044. */
  1045. protected function getFacets(SearchApiQueryInterface $query, SelectQueryInterface $db_query) {
  1046. // We only need the id field, not the score.
  1047. $fields = &$db_query->getFields();
  1048. unset($fields['score']);
  1049. if (count($fields) != 1 || !isset($fields['item_id'])) {
  1050. $this->warnings[] = t('Error while adding facets: only "item_id" field should be used, used are: @fields.',
  1051. array('@fields' => implode(', ', array_keys($fields))));
  1052. return array();
  1053. }
  1054. $expressions = &$db_query->getExpressions();
  1055. $expressions = array();
  1056. $db_query->distinct();
  1057. if (!$db_query->preExecute()) {
  1058. return array();
  1059. }
  1060. $args = $db_query->getArguments();
  1061. $table = db_query_temporary((string) $db_query, $args, $this->query_options);
  1062. $fields = $this->options['indexes'][$query->getIndex()->machine_name];
  1063. $ret = array();
  1064. foreach ($query->getOption('search_api_facets') as $key => $facet) {
  1065. if (empty($fields[$facet['field']])) {
  1066. $this->warnings[] = t('Unknown facet field @field.', array('@field' => $facet['field']));
  1067. continue;
  1068. }
  1069. $field = $fields[$facet['field']];
  1070. $missing_count = 0;
  1071. $select = db_select($table, 't');
  1072. $alias = $this->getTableAlias($field, $select, TRUE);
  1073. $select->addField($alias, search_api_is_text_type($field['type']) ? 'word' : 'value', 'value');
  1074. $select->addExpression('COUNT(DISTINCT t.item_id)', 'num');
  1075. $select->groupBy('value');
  1076. $select->orderBy('num', 'DESC');
  1077. $limit = $facet['limit'];
  1078. if ((int) $limit > 0) {
  1079. $select->range(0, $limit);
  1080. }
  1081. if ($facet['min_count'] > 1) {
  1082. $select->having('num >= :count', array(':count' => $facet['min_count']));
  1083. }
  1084. if ($facet['missing']) {
  1085. // The "missing" facet is defined by the table not having any entries
  1086. // for those items. We therefore need to execute an additional query,
  1087. // counting the items which do not have any entries in the field table.
  1088. if ($facet['missing']) {
  1089. $inner_query = db_select($field['table'], 't1')
  1090. ->fields('t1', array('item_id'));
  1091. $missing_count = db_select($table, 't');
  1092. $missing_count->addExpression('COUNT(item_id)');
  1093. $missing_count->condition('item_id', $inner_query, 'NOT IN');
  1094. $missing_count = $missing_count->execute()->fetchField();
  1095. $missing_count = $missing_count >= $facet['min_count'] ? $missing_count : 0;
  1096. }
  1097. }
  1098. $terms = array();
  1099. foreach ($select->execute() as $row) {
  1100. if ($missing_count && $missing_count > $row->num) {
  1101. $terms[] = array(
  1102. 'count' => $missing_count,
  1103. 'filter' => '!',
  1104. );
  1105. $missing_count = 0;
  1106. if ($limit && count($terms) == $limit) {
  1107. break;
  1108. }
  1109. }
  1110. $terms[] = array(
  1111. 'count' => $row->num,
  1112. 'filter' => isset($row->value) ? '"' . $row->value . '"' : '!',
  1113. );
  1114. }
  1115. if ($missing_count && (!$limit || count($terms) < $limit)) {
  1116. $terms[] = array(
  1117. 'count' => $missing_count,
  1118. 'filter' => '!',
  1119. );
  1120. }
  1121. $ret[$key] = $terms;
  1122. }
  1123. return $ret;
  1124. }
  1125. /**
  1126. * Helper method for setting the database to the one selected by the user.
  1127. */
  1128. protected function setDb() {
  1129. if (!isset($this->previous_db)) {
  1130. list($key, $target) = explode(':', $this->options['database'], 2);
  1131. $this->previous_db = db_set_active($key);
  1132. if (!isset($this->query_options)) {
  1133. $this->query_options = array('target' => $target);
  1134. }
  1135. return TRUE;
  1136. }
  1137. return FALSE;
  1138. }
  1139. /**
  1140. * Helper method for resetting the original database.
  1141. */
  1142. protected function resetDb() {
  1143. if (isset($this->previous_db)) {
  1144. db_set_active($this->previous_db);
  1145. $this->previous_db = NULL;
  1146. return TRUE;
  1147. }
  1148. return FALSE;
  1149. }
  1150. }