FeedsJSONPathParser.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. <?php
  2. /**
  3. * @file
  4. * Provides the Class for Feeds JSONPath Parser.
  5. */
  6. /**
  7. * Base class for the HTML and XML parsers.
  8. */
  9. class FeedsJSONPathParser extends FeedsParser {
  10. protected $debug;
  11. /**
  12. * Implements FeedsParser::parse().
  13. */
  14. public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
  15. $mappings = $this->getOwnMappings();
  16. $source_config = $source->getConfigFor($this);
  17. // Allow config inheritance.
  18. if (empty($source_config)) {
  19. $source_config = $this->config;
  20. }
  21. $this->debug = array_keys(array_filter($source_config['debug']['options']));
  22. $raw = trim($fetcher_result->getRaw());
  23. $result = new FeedsParserResult();
  24. // Set link so we can set the result link attribute.
  25. $fetcher_config = $source->getConfigFor($source->importer->fetcher);
  26. $result->link = $fetcher_config['source'];
  27. $array = json_decode(utf8_encode($raw), TRUE);
  28. // Support JSON lines format.
  29. if (!is_array($array)) {
  30. $raw = preg_replace('/}\s*{/', '},{', $raw);
  31. $raw = '[' . $raw . ']';
  32. $array = json_decode(utf8_encode($raw), TRUE);
  33. }
  34. unset($raw);
  35. if (!is_array($array)) {
  36. throw new Exception(t('There was an error decoding the JSON document.'));
  37. }
  38. $files = glob(dirname(__FILE__) . '/jsonpath*.php');
  39. require_once reset($files);
  40. $all_items = $this->jsonPath($array, $source_config['context']);
  41. unset($array);
  42. $this->debug($all_items, 'context');
  43. foreach ($all_items as $item) {
  44. $parsed_item = $variables = array();
  45. foreach ($source_config['sources'] as $source => $query) {
  46. // Variable substitution.
  47. $query = strtr($query, $variables);
  48. $parsed = $this->parseSourceElement($item, $query, $source);
  49. // Avoid null values.
  50. if (isset($parsed)) {
  51. // Variable sunstitution can't handle arrays.
  52. if (!is_array($parsed)) {
  53. $variables['{' . $mappings[$source] . '}'] = $parsed;
  54. }
  55. else {
  56. $variables['{' . $mappings[$source] . '}'] = '';
  57. }
  58. $parsed_item[$source] = $parsed;
  59. }
  60. }
  61. if (!empty($parsed_item)) {
  62. $result->items[] = $parsed_item;
  63. }
  64. }
  65. return $result;
  66. }
  67. /**
  68. * Utilizes the jsonPath function from jsonpath-0.8.1.php.
  69. *
  70. * jsonPath returns false if the expression returns zero results and that will
  71. * mess up our for loops, so return an empty array instead.
  72. *
  73. * @todo
  74. * Firgure out error handling.
  75. *
  76. * @param array $array
  77. * The input array to parse.
  78. * @param string $expression
  79. * The JSONPath expression.
  80. *
  81. * @return array
  82. * Returns an array that is the output of jsonPath.
  83. */
  84. protected function jsonPath($array, $expression) {
  85. $result = jsonPath($array, $expression);
  86. return ($result === FALSE) ? array() : $result;
  87. }
  88. /**
  89. * Parses one item from the context array.
  90. *
  91. * @param array $item
  92. * An array containing one item from the context.
  93. * @param string $query
  94. * A JSONPath query.
  95. * @param string $source
  96. * The source element that corresponds to the query.
  97. *
  98. * @return array
  99. * An array containing the results of the query.
  100. */
  101. protected function parseSourceElement($item, $query, $source) {
  102. if (empty($query)) {
  103. return;
  104. }
  105. $results = $this->jsonPath($item, $query);
  106. $this->debug($results, $source);
  107. unset($item);
  108. // If there is one result, return it directly. If there are no results,
  109. // return. Otherwise return the results.
  110. if (count($results) === 1) {
  111. return $results[0];
  112. }
  113. if (count($results) === 0) {
  114. return;
  115. }
  116. return $results;
  117. }
  118. /**
  119. * Source form.
  120. */
  121. public function sourceForm($source_config) {
  122. $form = array();
  123. if (empty($source_config)) {
  124. $source_config = $this->config;
  125. }
  126. if (isset($source_config['allow_override']) &&
  127. !$source_config['allow_override'] &&
  128. empty($source_config['config'])) {
  129. return;
  130. }
  131. // Add extensions that might get importerd.
  132. $fetcher = feeds_importer($this->id)->fetcher;
  133. if (isset($fetcher->config['allowed_extensions'])) {
  134. if (strpos($fetcher->config['allowed_extensions'], 'json') === FALSE) {
  135. $fetcher->config['allowed_extensions'] .= ' json';
  136. }
  137. }
  138. $mappings_ = feeds_importer($this->id)->processor->config['mappings'];
  139. $uniques = $mappings = array();
  140. foreach ($mappings_ as $mapping) {
  141. if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
  142. $mappings[$mapping['source']] = $mapping['target'];
  143. if ($mapping['unique']) {
  144. $uniques[] = $mapping['target'];
  145. }
  146. }
  147. }
  148. $form['jsonpath'] = array(
  149. '#type' => 'fieldset',
  150. '#title' => t('JSONPath Parser Settings'),
  151. '#collapsible' => TRUE,
  152. '#collapsed' => TRUE,
  153. '#tree' => TRUE,
  154. );
  155. if (empty($mappings)) {
  156. // Detect if Feeds menu structure has changed. This will take a while to
  157. // be released, but since I run dev it needs to work.
  158. $feeds_menu = feeds_ui_menu();
  159. if (isset($feeds_menu['admin/structure/feeds/list'])) {
  160. $feeds_base = 'admin/structure/feeds/edit/';
  161. }
  162. else {
  163. $feeds_base = 'admin/structure/feeds/';
  164. }
  165. $form['jsonpath']['error_message']['#markup'] = '<div class="help">' . t('No JSONPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
  166. return $form;
  167. }
  168. $form['jsonpath']['context'] = array(
  169. '#type' => 'textfield',
  170. '#title' => t('Context'),
  171. '#required' => TRUE,
  172. '#description' => t('This is the base query, all other queries will execute in this context.'),
  173. '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
  174. '#maxlength' => 1024,
  175. '#size' => 80,
  176. );
  177. $form['jsonpath']['sources'] = array(
  178. '#type' => 'fieldset',
  179. );
  180. if (!empty($uniques)) {
  181. $items = array(
  182. format_plural(count($uniques),
  183. t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
  184. array('!column' => implode(', ', $uniques))),
  185. t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
  186. array('!columns' => implode(', ', $uniques)))),
  187. );
  188. $form['jsonpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
  189. }
  190. $variables = array();
  191. foreach ($mappings as $source => $target) {
  192. $form['jsonpath']['sources'][$source] = array(
  193. '#type' => 'textfield',
  194. '#title' => $target,
  195. '#description' => t('The JSONPath expression to execute.'),
  196. '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
  197. '#maxlength' => 1024,
  198. '#size' => 80,
  199. );
  200. if (!empty($variables)) {
  201. $variable_text = format_plural(count($variables),
  202. t('The variable %v is availliable for replacement.', array('%v' => implode(', ', $variables))),
  203. t('The variables %v are availliable for replacement.', array('%v' => implode(', ', $variables)))
  204. );
  205. $form['jsonpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
  206. }
  207. $variables[] = '{' . $target . '}';
  208. }
  209. $form['jsonpath']['debug'] = array(
  210. '#type' => 'fieldset',
  211. '#title' => t('Debug'),
  212. '#collapsible' => TRUE,
  213. '#collapsed' => TRUE,
  214. );
  215. $form['jsonpath']['debug']['options'] = array(
  216. '#type' => 'checkboxes',
  217. '#title' => t('Debug query'),
  218. '#options' => array_merge(array('context' => 'context'), $mappings),
  219. '#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
  220. );
  221. return $form;
  222. }
  223. /**
  224. * Override parent::configForm().
  225. */
  226. public function configForm(&$form_state) {
  227. $config = $this->getConfig();
  228. $config['config'] = TRUE;
  229. $form = $this->sourceForm($config);
  230. $form['jsonpath']['context']['#required'] = FALSE;
  231. $form['jsonpath']['#collapsed'] = FALSE;
  232. $form['jsonpath']['allow_override'] = array(
  233. '#type' => 'checkbox',
  234. '#title' => t('Allow source configuration override'),
  235. '#description' => t('This setting allows feed nodes to specify their own JSONPath values for the context and sources.'),
  236. '#default_value' => $config['allow_override'],
  237. );
  238. return $form;
  239. }
  240. /**
  241. * Override parent::getMappingSources().
  242. */
  243. public function getMappingSources() {
  244. $mappings = $this->filterMappings(feeds_importer($this->id)->processor->config['mappings']);
  245. $next = 0;
  246. if (!empty($mappings)) {
  247. $keys = array_keys($mappings);
  248. $nums = array();
  249. foreach ($keys as $key) {
  250. list(, $num) = explode(':', $key);
  251. $nums[] = $num;
  252. }
  253. $max = max($nums);
  254. $next = ++$max;
  255. }
  256. return array(
  257. 'jsonpath_parser:' . $next => array(
  258. 'name' => t('JSONPath Expression'),
  259. 'description' => t('Allows you to configure a JSONPath expression that will populate this field.'),
  260. ),
  261. ) + parent::getMappingSources();
  262. }
  263. public function sourceDefaults() {
  264. return array();
  265. }
  266. /**
  267. * Define defaults.
  268. */
  269. public function configDefaults() {
  270. return array(
  271. 'context' => '',
  272. 'sources' => array(),
  273. 'debug' => array(),
  274. 'allow_override' => FALSE,
  275. );
  276. }
  277. /**
  278. * Override parent::sourceFormValidate().
  279. *
  280. * If the values of this source are the same as the base config we set them to
  281. * blank to that the values will be inherited from the importer defaults.
  282. *
  283. * @param array $values
  284. * The values from the form to validate, passed by reference.
  285. */
  286. public function sourceFormValidate(&$values) {
  287. $config = $this->getConfig();
  288. $values = $values['jsonpath'];
  289. $allow_override = $config['allow_override'];
  290. unset($config['allow_override']);
  291. ksort($values);
  292. ksort($config);
  293. if ($values === $config || !$allow_override) {
  294. $values = array();
  295. return;
  296. }
  297. $this->configFormValidate($values);
  298. }
  299. /**
  300. * Override parent::sourceFormValidate().
  301. */
  302. public function configFormValidate(&$values) {
  303. if (isset($values['jsonpath'])) {
  304. $values = $values['jsonpath'];
  305. }
  306. $values['context'] = trim($values['context']);
  307. foreach ($values['sources'] as &$source) {
  308. $source = trim($source);
  309. }
  310. }
  311. /**
  312. * Gets the mappings that belong to this parser.
  313. *
  314. * @return array
  315. * An array of mappings keyed source => target.
  316. */
  317. protected function getOwnMappings() {
  318. $importer_config = feeds_importer($this->id)->getConfig();
  319. return $this->filterMappings($importer_config['processor']['config']['mappings']);
  320. }
  321. /**
  322. * Filters mappings, returning the ones that belong to us.
  323. *
  324. * @param array $mappings
  325. * A mapping array from a processor.
  326. *
  327. * @return array
  328. * An array of mappings keyed source => target.
  329. */
  330. protected function filterMappings($mappings) {
  331. $our_mappings = array();
  332. foreach ($mappings as $mapping) {
  333. if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
  334. $our_mappings[$mapping['source']] = $mapping['target'];
  335. }
  336. }
  337. return $our_mappings;
  338. }
  339. protected function debug($item, $source) {
  340. if (in_array($source, $this->debug)) {
  341. $o = '<ul>';
  342. foreach ($item as $i) {
  343. $o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
  344. }
  345. $o .= '</ul>';
  346. drupal_set_message($source . ':' . $o);
  347. }
  348. }
  349. }