processor.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. <?php
  2. /**
  3. * Interface representing a Search API pre- and/or post-processor.
  4. *
  5. * While processors are enabled or disabled for both pre- and postprocessing at
  6. * once, many processors will only need to run in one of those two phases. Then,
  7. * the other method(s) should simply be left blank. A processor should make it
  8. * clear in its description or documentation when it will run and what effect it
  9. * will have.
  10. * Usually, processors preprocessing indexed items will likewise preprocess
  11. * search queries, so these two methods should mostly be implemented either both
  12. * or neither.
  13. */
  14. interface SearchApiProcessorInterface {
  15. /**
  16. * Construct a processor.
  17. *
  18. * @param SearchApiIndex $index
  19. * The index for which processing is done.
  20. * @param array $options
  21. * The processor options set for this index.
  22. */
  23. public function __construct(SearchApiIndex $index, array $options = array());
  24. /**
  25. * Check whether this processor is applicable for a certain index.
  26. *
  27. * This can be used for hiding the processor on the index's "Workflow" tab. To
  28. * avoid confusion, you should only use criteria that are immutable, such as
  29. * the index's item type. Also, since this is only used for UI purposes, you
  30. * should not completely rely on this to ensure certain index configurations
  31. * and at least throw an exception with a descriptive error message if this is
  32. * violated on runtime.
  33. *
  34. * @param SearchApiIndex $index
  35. * The index to check for.
  36. *
  37. * @return boolean
  38. * TRUE if the processor can run on the given index; FALSE otherwise.
  39. */
  40. public function supportsIndex(SearchApiIndex $index);
  41. /**
  42. * Display a form for configuring this processor.
  43. * Since forcing users to specify options for disabled processors makes no
  44. * sense, none of the form elements should have the '#required' attribute set.
  45. *
  46. * @return array
  47. * A form array for configuring this processor, or FALSE if no configuration
  48. * is possible.
  49. */
  50. public function configurationForm();
  51. /**
  52. * Validation callback for the form returned by configurationForm().
  53. *
  54. * @param array $form
  55. * The form returned by configurationForm().
  56. * @param array $values
  57. * The part of the $form_state['values'] array corresponding to this form.
  58. * @param array $form_state
  59. * The complete form state.
  60. */
  61. public function configurationFormValidate(array $form, array &$values, array &$form_state);
  62. /**
  63. * Submit callback for the form returned by configurationForm().
  64. *
  65. * This method should both return the new options and set them internally.
  66. *
  67. * @param array $form
  68. * The form returned by configurationForm().
  69. * @param array $values
  70. * The part of the $form_state['values'] array corresponding to this form.
  71. * @param array $form_state
  72. * The complete form state.
  73. *
  74. * @return array
  75. * The new options array for this callback.
  76. */
  77. public function configurationFormSubmit(array $form, array &$values, array &$form_state);
  78. /**
  79. * Preprocess data items for indexing.
  80. *
  81. * Typically, a preprocessor will execute its preprocessing (e.g. stemming,
  82. * n-grams, word splitting, stripping stop words, etc.) only on the items'
  83. * search_api_fulltext fields, if set. Other fields should usually be left
  84. * untouched.
  85. *
  86. * @param array $items
  87. * An array of items to be preprocessed for indexing, formatted as specified
  88. * by SearchApiServiceInterface::indexItems().
  89. */
  90. public function preprocessIndexItems(array &$items);
  91. /**
  92. * Preprocess a search query.
  93. *
  94. * The same applies as when preprocessing indexed items: typically, only the
  95. * fulltext search keys should be processed, queries on specific fields should
  96. * usually not be altered.
  97. *
  98. * @param SearchApiQuery $query
  99. * The object representing the query to be executed.
  100. */
  101. public function preprocessSearchQuery(SearchApiQuery $query);
  102. /**
  103. * Postprocess search results before display.
  104. *
  105. * If a class is used for both pre- and post-processing a search query, the
  106. * same object will be used for both calls (so preserving some data or state
  107. * locally is possible).
  108. *
  109. * @param array $response
  110. * An array containing the search results. See the return value of
  111. * SearchApiQueryInterface->execute() for the detailed format.
  112. * @param SearchApiQuery $query
  113. * The object representing the executed query.
  114. */
  115. public function postprocessSearchResults(array &$response, SearchApiQuery $query);
  116. }
  117. /**
  118. * Abstract processor implementation that provides an easy framework for only
  119. * processing specific fields.
  120. *
  121. * Simple processors can just override process(), while others might want to
  122. * override the other process*() methods, and test*() (for restricting
  123. * processing to something other than all fulltext data).
  124. */
  125. abstract class SearchApiAbstractProcessor implements SearchApiProcessorInterface {
  126. /**
  127. * @var SearchApiIndex
  128. */
  129. protected $index;
  130. /**
  131. * @var array
  132. */
  133. protected $options;
  134. /**
  135. * Constructor, saving its arguments into properties.
  136. */
  137. public function __construct(SearchApiIndex $index, array $options = array()) {
  138. $this->index = $index;
  139. $this->options = $options;
  140. }
  141. public function supportsIndex(SearchApiIndex $index) {
  142. return TRUE;
  143. }
  144. public function configurationForm() {
  145. $form['#attached']['css'][] = drupal_get_path('module', 'search_api') . '/search_api.admin.css';
  146. $fields = $this->index->getFields();
  147. $field_options = array();
  148. $default_fields = array();
  149. if (isset($this->options['fields'])) {
  150. $default_fields = drupal_map_assoc(array_keys($this->options['fields']));
  151. }
  152. foreach ($fields as $name => $field) {
  153. $field_options[$name] = $field['name'];
  154. if (!empty($default_fields[$name]) || (!isset($this->options['fields']) && $this->testField($name, $field))) {
  155. $default_fields[$name] = $name;
  156. }
  157. }
  158. $form['fields'] = array(
  159. '#type' => 'checkboxes',
  160. '#title' => t('Fields to run on'),
  161. '#options' => $field_options,
  162. '#default_value' => $default_fields,
  163. '#attributes' => array('class' => array('search-api-checkboxes-list')),
  164. );
  165. return $form;
  166. }
  167. public function configurationFormValidate(array $form, array &$values, array &$form_state) {
  168. $fields = array_filter($values['fields']);
  169. if ($fields) {
  170. $fields = array_combine($fields, array_fill(0, count($fields), TRUE));
  171. }
  172. $values['fields'] = $fields;
  173. }
  174. public function configurationFormSubmit(array $form, array &$values, array &$form_state) {
  175. $this->options = $values;
  176. return $values;
  177. }
  178. /**
  179. * Calls processField() for all appropriate fields.
  180. */
  181. public function preprocessIndexItems(array &$items) {
  182. foreach ($items as &$item) {
  183. foreach ($item as $name => &$field) {
  184. if ($this->testField($name, $field)) {
  185. $this->processField($field['value'], $field['type']);
  186. }
  187. }
  188. }
  189. }
  190. /**
  191. * Calls processKeys() for the keys and processFilters() for the filters.
  192. */
  193. public function preprocessSearchQuery(SearchApiQuery $query) {
  194. $keys = &$query->getKeys();
  195. $this->processKeys($keys);
  196. $filter = $query->getFilter();
  197. $filters = &$filter->getFilters();
  198. $this->processFilters($filters);
  199. }
  200. /**
  201. * Does nothing.
  202. */
  203. public function postprocessSearchResults(array &$response, SearchApiQuery $query) {
  204. return;
  205. }
  206. /**
  207. * Method for preprocessing field data.
  208. *
  209. * Calls process() either for the whole text, or each token, depending on the
  210. * type. Also takes care of extracting list values and of fusing returned
  211. * tokens back into a one-dimensional array.
  212. */
  213. protected function processField(&$value, &$type) {
  214. if (!isset($value) || $value === '') {
  215. return;
  216. }
  217. if (substr($type, 0, 5) == 'list<') {
  218. $inner_type = $t = $t1 = substr($type, 5, -1);
  219. foreach ($value as &$v) {
  220. $t1 = $inner_type;
  221. $this->processField($v, $t1);
  222. // If one value got tokenized, all others have to follow.
  223. if ($t1 != $inner_type) {
  224. $t = $t1;
  225. }
  226. }
  227. if ($t == 'tokens') {
  228. foreach ($value as $i => &$v) {
  229. if (!$v) {
  230. unset($value[$i]);
  231. continue;
  232. }
  233. if (!is_array($v)) {
  234. $v = array(array('value' => $v, 'score' => 1));
  235. }
  236. }
  237. }
  238. $type = "list<$t>";
  239. return;
  240. }
  241. if ($type == 'tokens') {
  242. foreach ($value as &$token) {
  243. $this->processFieldValue($token['value']);
  244. }
  245. }
  246. else {
  247. $this->processFieldValue($value);
  248. }
  249. if (is_array($value)) {
  250. $type = 'tokens';
  251. $value = $this->normalizeTokens($value);
  252. }
  253. }
  254. /**
  255. * Internal helper function for normalizing tokens.
  256. */
  257. protected function normalizeTokens($tokens, $score = 1) {
  258. $ret = array();
  259. foreach ($tokens as $token) {
  260. if (empty($token['value']) && !is_numeric($token['value'])) {
  261. // Filter out empty tokens.
  262. continue;
  263. }
  264. if (!isset($token['score'])) {
  265. $token['score'] = $score;
  266. }
  267. else {
  268. $token['score'] *= $score;
  269. }
  270. if (is_array($token['value'])) {
  271. foreach ($this->normalizeTokens($token['value'], $token['score']) as $t) {
  272. $ret[] = $t;
  273. }
  274. }
  275. else {
  276. $ret[] = $token;
  277. }
  278. }
  279. return $ret;
  280. }
  281. /**
  282. * Method for preprocessing search keys.
  283. */
  284. protected function processKeys(&$keys) {
  285. if (is_array($keys)) {
  286. foreach ($keys as $key => &$v) {
  287. if (element_child($key)) {
  288. $this->processKeys($v);
  289. if (!$v && !is_numeric($v)) {
  290. unset($keys[$key]);
  291. }
  292. }
  293. }
  294. }
  295. else {
  296. $this->processKey($keys);
  297. }
  298. }
  299. /**
  300. * Method for preprocessing query filters.
  301. */
  302. protected function processFilters(array &$filters) {
  303. $fields = $this->index->options['fields'];
  304. foreach ($filters as &$f) {
  305. if (is_array($f)) {
  306. if (isset($fields[$f[0]]) && $this->testField($f[0], $fields[$f[0]])) {
  307. $this->processFilterValue($f[1]);
  308. }
  309. }
  310. else {
  311. $child_filters = &$f->getFilters();
  312. $this->processFilters($child_filters);
  313. }
  314. }
  315. }
  316. /**
  317. * @param $name
  318. * The field's machine name.
  319. * @param array $field
  320. * The field's information.
  321. *
  322. * @return
  323. * TRUE, iff the field should be processed.
  324. */
  325. protected function testField($name, array $field) {
  326. if (empty($this->options['fields'])) {
  327. return $this->testType($field['type']);
  328. }
  329. return !empty($this->options['fields'][$name]);
  330. }
  331. /**
  332. * @return
  333. * TRUE, iff the type should be processed.
  334. */
  335. protected function testType($type) {
  336. return search_api_is_text_type($type, array('text', 'tokens'));
  337. }
  338. /**
  339. * Called for processing a single text element in a field. The default
  340. * implementation just calls process().
  341. *
  342. * $value can either be left a string, or changed into an array of tokens. A
  343. * token is an associative array containing:
  344. * - value: Either the text inside the token, or a nested array of tokens. The
  345. * score of nested tokens will be multiplied by their parent's score.
  346. * - score: The relative importance of the token, as a float, with 1 being
  347. * the default.
  348. */
  349. protected function processFieldValue(&$value) {
  350. $this->process($value);
  351. }
  352. /**
  353. * Called for processing a single search keyword. The default implementation
  354. * just calls process().
  355. *
  356. * $value can either be left a string, or be changed into a nested keys array,
  357. * as defined by SearchApiQueryInterface.
  358. */
  359. protected function processKey(&$value) {
  360. $this->process($value);
  361. }
  362. /**
  363. * Called for processing a single filter value. The default implementation
  364. * just calls process().
  365. *
  366. * $value has to remain a string.
  367. */
  368. protected function processFilterValue(&$value) {
  369. $this->process($value);
  370. }
  371. /**
  372. * Function that is ultimately called for all text by the standard
  373. * implementation, and does nothing by default.
  374. *
  375. * @param $value
  376. * The value to preprocess as a string. Can be manipulated directly, nothing
  377. * has to be returned. Since this can be called for all value types, $value
  378. * has to remain a string.
  379. */
  380. protected function process(&$value) {
  381. }
  382. }