processor.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. <?php
  2. /**
  3. * @file
  4. * Contains SearchApiProcessorInterface and SearchApiAbstractProcessor.
  5. */
  6. /**
  7. * Interface representing a Search API pre- and/or post-processor.
  8. *
  9. * While processors are enabled or disabled for both pre- and postprocessing at
  10. * once, many processors will only need to run in one of those two phases. Then,
  11. * the other method(s) should simply be left blank. A processor should make it
  12. * clear in its description or documentation when it will run and what effect it
  13. * will have.
  14. * Usually, processors preprocessing indexed items will likewise preprocess
  15. * search queries, so these two methods should mostly be implemented either both
  16. * or neither.
  17. */
  18. interface SearchApiProcessorInterface {
  19. /**
  20. * Construct a processor.
  21. *
  22. * @param SearchApiIndex $index
  23. * The index for which processing is done.
  24. * @param array $options
  25. * The processor options set for this index.
  26. */
  27. public function __construct(SearchApiIndex $index, array $options = array());
  28. /**
  29. * Check whether this processor is applicable for a certain index.
  30. *
  31. * This can be used for hiding the processor on the index's "Filters" tab. To
  32. * avoid confusion, you should only use criteria that are immutable, such as
  33. * the index's item type. Also, since this is only used for UI purposes, you
  34. * should not completely rely on this to ensure certain index configurations
  35. * and at least throw an exception with a descriptive error message if this is
  36. * violated on runtime.
  37. *
  38. * @param SearchApiIndex $index
  39. * The index to check for.
  40. *
  41. * @return boolean
  42. * TRUE if the processor can run on the given index; FALSE otherwise.
  43. */
  44. public function supportsIndex(SearchApiIndex $index);
  45. /**
  46. * Display a form for configuring this processor.
  47. * Since forcing users to specify options for disabled processors makes no
  48. * sense, none of the form elements should have the '#required' attribute set.
  49. *
  50. * @return array
  51. * A form array for configuring this processor, or FALSE if no configuration
  52. * is possible.
  53. */
  54. public function configurationForm();
  55. /**
  56. * Validation callback for the form returned by configurationForm().
  57. *
  58. * @param array $form
  59. * The form returned by configurationForm().
  60. * @param array $values
  61. * The part of the $form_state['values'] array corresponding to this form.
  62. * @param array $form_state
  63. * The complete form state.
  64. */
  65. public function configurationFormValidate(array $form, array &$values, array &$form_state);
  66. /**
  67. * Submit callback for the form returned by configurationForm().
  68. *
  69. * This method should both return the new options and set them internally.
  70. *
  71. * @param array $form
  72. * The form returned by configurationForm().
  73. * @param array $values
  74. * The part of the $form_state['values'] array corresponding to this form.
  75. * @param array $form_state
  76. * The complete form state.
  77. *
  78. * @return array
  79. * The new options array for this callback.
  80. */
  81. public function configurationFormSubmit(array $form, array &$values, array &$form_state);
  82. /**
  83. * Preprocess data items for indexing.
  84. *
  85. * Typically, a preprocessor will execute its preprocessing (e.g. stemming,
  86. * n-grams, word splitting, stripping stop words, etc.) only on the items'
  87. * search_api_fulltext fields, if set. Other fields should usually be left
  88. * untouched.
  89. *
  90. * @param array $items
  91. * An array of items to be preprocessed for indexing, formatted as specified
  92. * by SearchApiServiceInterface::indexItems().
  93. */
  94. public function preprocessIndexItems(array &$items);
  95. /**
  96. * Preprocess a search query.
  97. *
  98. * The same applies as when preprocessing indexed items: typically, only the
  99. * fulltext search keys should be processed, queries on specific fields should
  100. * usually not be altered.
  101. *
  102. * @param SearchApiQuery $query
  103. * The object representing the query to be executed.
  104. */
  105. public function preprocessSearchQuery(SearchApiQuery $query);
  106. /**
  107. * Postprocess search results before display.
  108. *
  109. * If a class is used for both pre- and post-processing a search query, the
  110. * same object will be used for both calls (so preserving some data or state
  111. * locally is possible).
  112. *
  113. * @param array $response
  114. * An array containing the search results. See the return value of
  115. * SearchApiQueryInterface->execute() for the detailed format.
  116. * @param SearchApiQuery $query
  117. * The object representing the executed query.
  118. */
  119. public function postprocessSearchResults(array &$response, SearchApiQuery $query);
  120. }
  121. /**
  122. * Abstract processor implementation that provides an easy framework for only
  123. * processing specific fields.
  124. *
  125. * Simple processors can just override process(), while others might want to
  126. * override the other process*() methods, and test*() (for restricting
  127. * processing to something other than all fulltext data).
  128. */
  129. abstract class SearchApiAbstractProcessor implements SearchApiProcessorInterface {
  130. /**
  131. * @var SearchApiIndex
  132. */
  133. protected $index;
  134. /**
  135. * @var array
  136. */
  137. protected $options;
  138. /**
  139. * Constructor, saving its arguments into properties.
  140. */
  141. public function __construct(SearchApiIndex $index, array $options = array()) {
  142. $this->index = $index;
  143. $this->options = $options;
  144. }
  145. public function supportsIndex(SearchApiIndex $index) {
  146. return TRUE;
  147. }
  148. public function configurationForm() {
  149. $form['#attached']['css'][] = drupal_get_path('module', 'search_api') . '/search_api.admin.css';
  150. $fields = $this->index->getFields();
  151. $field_options = array();
  152. $default_fields = array();
  153. if (isset($this->options['fields'])) {
  154. $default_fields = drupal_map_assoc(array_keys($this->options['fields']));
  155. }
  156. foreach ($fields as $name => $field) {
  157. $field_options[$name] = check_plain($field['name']);
  158. if (!empty($default_fields[$name]) || (!isset($this->options['fields']) && $this->testField($name, $field))) {
  159. $default_fields[$name] = $name;
  160. }
  161. }
  162. $form['fields'] = array(
  163. '#type' => 'checkboxes',
  164. '#title' => t('Fields to run on'),
  165. '#options' => $field_options,
  166. '#default_value' => $default_fields,
  167. '#attributes' => array('class' => array('search-api-checkboxes-list')),
  168. );
  169. return $form;
  170. }
  171. public function configurationFormValidate(array $form, array &$values, array &$form_state) {
  172. $fields = array_filter($values['fields']);
  173. if ($fields) {
  174. $fields = array_fill_keys($fields, TRUE);
  175. }
  176. $values['fields'] = $fields;
  177. }
  178. public function configurationFormSubmit(array $form, array &$values, array &$form_state) {
  179. $this->options = $values;
  180. return $values;
  181. }
  182. /**
  183. * Calls processField() for all appropriate fields.
  184. */
  185. public function preprocessIndexItems(array &$items) {
  186. foreach ($items as &$item) {
  187. foreach ($item as $name => &$field) {
  188. if ($this->testField($name, $field)) {
  189. $this->processField($field['value'], $field['type']);
  190. }
  191. }
  192. }
  193. }
  194. /**
  195. * Calls processKeys() for the keys and processFilters() for the filters.
  196. */
  197. public function preprocessSearchQuery(SearchApiQuery $query) {
  198. $keys = &$query->getKeys();
  199. $this->processKeys($keys);
  200. $filter = $query->getFilter();
  201. $filters = &$filter->getFilters();
  202. $this->processFilters($filters);
  203. }
  204. /**
  205. * Does nothing.
  206. */
  207. public function postprocessSearchResults(array &$response, SearchApiQuery $query) {
  208. return;
  209. }
  210. /**
  211. * Method for preprocessing field data.
  212. *
  213. * Calls process() either for the whole text, or each token, depending on the
  214. * type. Also takes care of extracting list values and of fusing returned
  215. * tokens back into a one-dimensional array.
  216. */
  217. protected function processField(&$value, &$type) {
  218. if (!isset($value) || $value === '') {
  219. return;
  220. }
  221. if (substr($type, 0, 5) == 'list<') {
  222. $inner_type = $t = $t1 = substr($type, 5, -1);
  223. foreach ($value as &$v) {
  224. $t1 = $inner_type;
  225. $this->processField($v, $t1);
  226. // If one value got tokenized, all others have to follow.
  227. if ($t1 != $inner_type) {
  228. $t = $t1;
  229. }
  230. }
  231. if ($t == 'tokens') {
  232. foreach ($value as $i => &$v) {
  233. if (!$v) {
  234. unset($value[$i]);
  235. continue;
  236. }
  237. if (!is_array($v)) {
  238. $v = array(array('value' => $v, 'score' => 1));
  239. }
  240. }
  241. }
  242. $type = "list<$t>";
  243. return;
  244. }
  245. if ($type == 'tokens') {
  246. foreach ($value as &$token) {
  247. $this->processFieldValue($token['value']);
  248. }
  249. }
  250. else {
  251. $this->processFieldValue($value);
  252. }
  253. if (is_array($value)) {
  254. // Don't tokenize non-fulltext content!
  255. if (in_array($type, array('text', 'tokens'))) {
  256. $type = 'tokens';
  257. $value = $this->normalizeTokens($value);
  258. }
  259. else {
  260. $value = $this->implodeTokens($value);
  261. }
  262. }
  263. }
  264. /**
  265. * Internal helper function for normalizing tokens.
  266. */
  267. protected function normalizeTokens($tokens, $score = 1) {
  268. $ret = array();
  269. foreach ($tokens as $token) {
  270. if (empty($token['value']) && !is_numeric($token['value'])) {
  271. // Filter out empty tokens.
  272. continue;
  273. }
  274. if (!isset($token['score'])) {
  275. $token['score'] = $score;
  276. }
  277. else {
  278. $token['score'] *= $score;
  279. }
  280. if (is_array($token['value'])) {
  281. foreach ($this->normalizeTokens($token['value'], $token['score']) as $t) {
  282. $ret[] = $t;
  283. }
  284. }
  285. else {
  286. $ret[] = $token;
  287. }
  288. }
  289. return $ret;
  290. }
  291. /**
  292. * Internal helper function for imploding tokens into a single string.
  293. *
  294. * @param array $tokens
  295. * The tokens array to implode.
  296. *
  297. * @return string
  298. * The text data from the tokens concatenated into a single string.
  299. */
  300. protected function implodeTokens(array $tokens) {
  301. $ret = array();
  302. foreach ($tokens as $token) {
  303. if (empty($token['value']) && !is_numeric($token['value'])) {
  304. // Filter out empty tokens.
  305. continue;
  306. }
  307. if (is_array($token['value'])) {
  308. $ret[] = $this->implodeTokens($token['value']);
  309. }
  310. else {
  311. $ret[] = $token['value'];
  312. }
  313. }
  314. return implode(' ', $ret);
  315. }
  316. /**
  317. * Method for preprocessing search keys.
  318. */
  319. protected function processKeys(&$keys) {
  320. if (is_array($keys)) {
  321. foreach ($keys as $key => &$v) {
  322. if (element_child($key)) {
  323. $this->processKeys($v);
  324. if (!$v && !is_numeric($v)) {
  325. unset($keys[$key]);
  326. }
  327. }
  328. }
  329. }
  330. else {
  331. $this->processKey($keys);
  332. }
  333. }
  334. /**
  335. * Method for preprocessing query filters.
  336. */
  337. protected function processFilters(array &$filters) {
  338. $fields = $this->index->options['fields'];
  339. foreach ($filters as $key => &$f) {
  340. if (is_array($f)) {
  341. if (isset($fields[$f[0]]) && $this->testField($f[0], $fields[$f[0]])) {
  342. // We want to allow processors also to easily remove complete filters.
  343. // However, we can't use empty() or the like, as that would sort out
  344. // filters for 0 or NULL. So we specifically check only for the empty
  345. // string, and we also make sure the filter value was actually changed
  346. // by storing whether it was empty before.
  347. $empty_string = $f[1] === '';
  348. $this->processFilterValue($f[1]);
  349. if ($f[1] === '' && !$empty_string) {
  350. unset($filters[$key]);
  351. }
  352. }
  353. }
  354. else {
  355. $child_filters = &$f->getFilters();
  356. $this->processFilters($child_filters);
  357. }
  358. }
  359. }
  360. /**
  361. * Determines whether to process data from the given field.
  362. *
  363. * @param $name
  364. * The field's machine name.
  365. * @param array $field
  366. * The field's information.
  367. *
  368. * @return bool
  369. * TRUE, if the field should be processed, FALSE otherwise.
  370. */
  371. protected function testField($name, array $field) {
  372. if (empty($this->options['fields'])) {
  373. return $this->testType($field['type']);
  374. }
  375. return !empty($this->options['fields'][$name]);
  376. }
  377. /**
  378. * Determines whether fields of the given type should normally be processed.
  379. *
  380. * Defaults to processing text types, but can easily be overridden by
  381. * subclasses.
  382. *
  383. * @return bool
  384. * TRUE, if the type should be processed, FALSE otherwise.
  385. */
  386. protected function testType($type) {
  387. return search_api_is_text_type($type, array('text', 'tokens'));
  388. }
  389. /**
  390. * Called for processing a single text element in a field. The default
  391. * implementation just calls process().
  392. *
  393. * $value can either be left a string, or changed into an array of tokens. A
  394. * token is an associative array containing:
  395. * - value: Either the text inside the token, or a nested array of tokens. The
  396. * score of nested tokens will be multiplied by their parent's score.
  397. * - score: The relative importance of the token, as a float, with 1 being
  398. * the default.
  399. */
  400. protected function processFieldValue(&$value) {
  401. $this->process($value);
  402. }
  403. /**
  404. * Called for processing a single search keyword. The default implementation
  405. * just calls process().
  406. *
  407. * $value can either be left a string, or be changed into a nested keys array,
  408. * as defined by SearchApiQueryInterface.
  409. */
  410. protected function processKey(&$value) {
  411. $this->process($value);
  412. }
  413. /**
  414. * Called for processing a single filter value. The default implementation
  415. * just calls process().
  416. *
  417. * $value has to remain a string.
  418. */
  419. protected function processFilterValue(&$value) {
  420. $this->process($value);
  421. }
  422. /**
  423. * Function that is ultimately called for all text by the standard
  424. * implementation, and does nothing by default.
  425. *
  426. * @param $value
  427. * The value to preprocess as a string. Can be manipulated directly, nothing
  428. * has to be returned. Since this can be called for all value types, $value
  429. * has to remain a string.
  430. */
  431. protected function process(&$value) {
  432. }
  433. }