first import
This commit is contained in:
510
sites/all/modules/feeds_xpathparser/FeedsXPathParserBase.inc
Normal file
510
sites/all/modules/feeds_xpathparser/FeedsXPathParserBase.inc
Normal file
@@ -0,0 +1,510 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Provides the base class for FeedsXPathParserHTML and FeedsXPathParserXML.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Base class for the HTML and XML parsers.
|
||||
*/
|
||||
abstract class FeedsXPathParserBase extends FeedsParser {
|
||||
|
||||
protected $modified_queries = array();
|
||||
protected $rawXML = array();
|
||||
protected $doc = NULL;
|
||||
protected $xpath = NULL;
|
||||
|
||||
/**
|
||||
* Classes that use FeedsXPathParserBase must implement this.
|
||||
*
|
||||
* @param array $source_config
|
||||
* The configuration for the source.
|
||||
* @param FeedsFetcherResult $fetcher_result
|
||||
* A FeedsFetcherResult object.
|
||||
*
|
||||
* @return DOMDocument
|
||||
* The DOMDocument to perform XPath queries on.
|
||||
*/
|
||||
abstract protected function setup($source_config, FeedsFetcherResult $fetcher_result);
|
||||
|
||||
/**
|
||||
* Implements FeedsParser::parse().
|
||||
*/
|
||||
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
|
||||
$source_config = $source->getConfigFor($this);
|
||||
|
||||
if (empty($source_config)) {
|
||||
$source_config = $this->getConfig();
|
||||
}
|
||||
|
||||
$this->doc = $this->setup($source_config, $fetcher_result);
|
||||
|
||||
$parser_result = new FeedsParserResult();
|
||||
|
||||
$mappings = $this->getOwnMappings();
|
||||
$this->rawXML = array_keys(array_filter($source_config['rawXML']));
|
||||
// Set link.
|
||||
$fetcher_config = $source->getConfigFor($source->importer->fetcher);
|
||||
$parser_result->link = $fetcher_config['source'];
|
||||
|
||||
$this->xpath = new FeedsXPathParserDOMXPath($this->doc);
|
||||
$config = array();
|
||||
$config['debug'] = array_keys(array_filter($source_config['exp']['debug']));
|
||||
$config['errors'] = $source_config['exp']['errors'];
|
||||
|
||||
$this->xpath->setConfig($config);
|
||||
$all_nodes = $this->xpath->namespacedQuery($source_config['context'], NULL, 'context');
|
||||
|
||||
foreach ($all_nodes as $node) {
|
||||
$parsed_item = $variables = array();
|
||||
foreach ($source_config['sources'] as $source => $query) {
|
||||
// Variable substitution.
|
||||
$query = strtr($query, $variables);
|
||||
// Parse the item.
|
||||
$result = $this->parseSourceElement($query, $node, $source);
|
||||
if (isset($result)) {
|
||||
if (!is_array($result)) {
|
||||
$variables['$' . $mappings[$source]] = $result;
|
||||
}
|
||||
else {
|
||||
$variables['$' . $mappings[$source]] = '';
|
||||
}
|
||||
$parsed_item[$source] = $result;
|
||||
}
|
||||
}
|
||||
if (!empty($parsed_item)) {
|
||||
$parser_result->items[] = $parsed_item;
|
||||
}
|
||||
}
|
||||
unset($this->doc);
|
||||
unset($this->xpath);
|
||||
return $parser_result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses one item from the context array.
|
||||
*
|
||||
* @param $item
|
||||
* A SimpleXMLElement from the context array.
|
||||
*
|
||||
* @param $query
|
||||
* An XPath query.
|
||||
*
|
||||
* @param $source
|
||||
* The name of the source for this query.
|
||||
*
|
||||
* @return array
|
||||
* An array containing the results of the query.
|
||||
*/
|
||||
protected function parseSourceElement($query, $context, $source) {
|
||||
|
||||
if (empty($query)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$node_list = $this->xpath->namespacedQuery($query, $context, $source);
|
||||
/**
|
||||
* Iterate through the results of the XPath query. If this source is
|
||||
* configured to return raw xml, make it so.
|
||||
*/
|
||||
if ($node_list instanceof DOMNodeList) {
|
||||
$results = array();
|
||||
if (in_array($source, $this->rawXML)) {
|
||||
foreach ($node_list as $node) {
|
||||
$results[] = $this->getRaw($node);
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($node_list as $node) {
|
||||
$results[] = $node->nodeValue;
|
||||
}
|
||||
}
|
||||
// Return single result if so.
|
||||
if (count($results) === 1) {
|
||||
return $results[0];
|
||||
}
|
||||
// Empty result returns NULL, that way we can check.
|
||||
elseif (empty($results)) {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
return $results;
|
||||
}
|
||||
}
|
||||
// A value was returned directly from namespacedQuery().
|
||||
else {
|
||||
return $node_list;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Source form.
|
||||
*/
|
||||
public function sourceForm($source_config) {
|
||||
$form = array();
|
||||
$importer = feeds_importer($this->id);
|
||||
$importer_config = $importer->getConfig();
|
||||
$mappings_ = $importer_config['processor']['config']['mappings'];
|
||||
|
||||
if (empty($source_config)) {
|
||||
$source_config = $this->getConfig();
|
||||
}
|
||||
|
||||
if (isset($source_config['allow_override']) &&
|
||||
!$source_config['allow_override'] &&
|
||||
empty($source_config['config'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Add extensions that might get importerd.
|
||||
$allowed_extensions = isset($importer_config['fetcher']['config']['allowed_extensions']) ? $importer_config['fetcher']['config']['allowed_extensions'] : FALSE;
|
||||
if ($allowed_extensions) {
|
||||
if (strpos($allowed_extensions, 'html') === FALSE) {
|
||||
$importer->fetcher->config['allowed_extensions'] .= ' html htm';
|
||||
}
|
||||
}
|
||||
|
||||
$uniques = $mappings = array();
|
||||
foreach ($mappings_ as $mapping) {
|
||||
if (strpos($mapping['source'], 'xpathparser:') === 0) {
|
||||
$mappings[$mapping['source']] = $mapping['target'];
|
||||
if ($mapping['unique']) {
|
||||
$uniques[] = $mapping['target'];
|
||||
}
|
||||
}
|
||||
}
|
||||
$form['xpath'] = array(
|
||||
'#type' => 'fieldset',
|
||||
'#tree' => TRUE,
|
||||
'#title' => t('XPath Parser Settings'),
|
||||
'#collapsible' => TRUE,
|
||||
'#collapsed' => TRUE,
|
||||
);
|
||||
if (empty($mappings)) {
|
||||
// Detect if Feeds menu structure has changed. This will take a while to be
|
||||
// released, but since I run dev it needs to work.
|
||||
$feeds_menu = feeds_ui_menu();
|
||||
if (isset($feeds_menu['admin/structure/feeds/list'])) {
|
||||
$feeds_base = 'admin/structure/feeds/edit/';
|
||||
}
|
||||
else {
|
||||
$feeds_base = 'admin/structure/feeds/';
|
||||
}
|
||||
$form['xpath']['error_message']['#markup'] = '<div class="help">' . t('No XPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
|
||||
return $form;
|
||||
}
|
||||
$form['xpath']['context'] = array(
|
||||
'#type' => 'textfield',
|
||||
'#title' => t('Context'),
|
||||
'#required' => TRUE,
|
||||
'#description' => t('This is the base query, all other queries will run in this context.'),
|
||||
'#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
|
||||
'#maxlength' => 1024,
|
||||
'#size' => 80,
|
||||
);
|
||||
$form['xpath']['sources'] = array(
|
||||
'#type' => 'fieldset',
|
||||
'#tree' => TRUE,
|
||||
);
|
||||
if (!empty($uniques)) {
|
||||
$items = array(
|
||||
format_plural(count($uniques),
|
||||
t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
|
||||
array('!column' => implode(', ', $uniques))),
|
||||
t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
|
||||
array('!columns' => implode(', ', $uniques)))),
|
||||
);
|
||||
$form['xpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
|
||||
}
|
||||
$variables = array();
|
||||
foreach ($mappings as $source => $target) {
|
||||
$form['xpath']['sources'][$source] = array(
|
||||
'#type' => 'textfield',
|
||||
'#title' => check_plain($target),
|
||||
'#description' => t('The XPath query to run.'),
|
||||
'#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
|
||||
'#maxlength' => 1024,
|
||||
'#size' => 80,
|
||||
);
|
||||
if (!empty($variables)) {
|
||||
$variable_text = format_plural(count($variables),
|
||||
t('The variable ' . implode(', ', $variables) . ' is available for replacement.'),
|
||||
t('The variables ' . implode(', ', $variables) . ' are available for replacement.')
|
||||
);
|
||||
$form['xpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
|
||||
}
|
||||
$variables[] = '$' . $target;
|
||||
}
|
||||
$form['xpath']['rawXML'] = array(
|
||||
'#type' => 'checkboxes',
|
||||
'#title' => t('Select the queries you would like to return raw XML or HTML'),
|
||||
'#options' => $mappings,
|
||||
'#default_value' => isset($source_config['rawXML']) ? $source_config['rawXML'] : array(),
|
||||
);
|
||||
$form['xpath']['exp'] = array(
|
||||
'#type' => 'fieldset',
|
||||
'#collapsible' => TRUE,
|
||||
'#collapsed' => TRUE,
|
||||
'#tree' => TRUE,
|
||||
'#title' => t('Debug Options'),
|
||||
);
|
||||
$form['xpath']['exp']['errors'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Show error messages.'),
|
||||
'#default_value' => isset($source_config['exp']['errors']) ? $source_config['exp']['errors'] : FALSE,
|
||||
);
|
||||
if (extension_loaded('tidy')) {
|
||||
$form['xpath']['exp']['tidy'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Use Tidy'),
|
||||
'#description' => t('The Tidy PHP extension has been detected.
|
||||
Select this to clean the markup before parsing.'),
|
||||
'#default_value' => isset($source_config['exp']['tidy']) ? $source_config['exp']['tidy'] : FALSE,
|
||||
);
|
||||
$form['xpath']['exp']['tidy_encoding'] = array(
|
||||
'#type' => 'textfield',
|
||||
'#title' => t('Tidy encoding'),
|
||||
'#description' => t('Set the encoding for tidy. See the !phpdocs for possible values.', array('!phpdocs' => l(t('PHP docs'), 'http://www.php.net/manual/en/tidy.parsestring.php/'))),
|
||||
'#default_value' => isset($source_config['exp']['tidy_encoding']) ? $source_config['exp']['tidy_encoding'] : 'UTF8',
|
||||
'#states' => array(
|
||||
'visible' => array(
|
||||
':input[name$="[tidy]"]' => array(
|
||||
'checked' => TRUE,
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
$form['xpath']['exp']['debug'] = array(
|
||||
'#type' => 'checkboxes',
|
||||
'#title' => t('Debug query'),
|
||||
'#options' => array_merge(array('context' => 'context'), $mappings),
|
||||
'#default_value' => isset($source_config['exp']['debug']) ? $source_config['exp']['debug'] : array(),
|
||||
);
|
||||
return $form;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override parent::configForm().
|
||||
*/
|
||||
public function configForm(&$form_state) {
|
||||
$config = $this->getConfig();
|
||||
$config['config'] = TRUE;
|
||||
$form = $this->sourceForm($config);
|
||||
$form['xpath']['context']['#required'] = FALSE;
|
||||
$form['xpath']['#collapsed'] = FALSE;
|
||||
$form['xpath']['allow_override'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Allow source configuration override'),
|
||||
'#description' => t('This setting allows feed nodes to specify their own XPath values for the context and sources.'),
|
||||
'#default_value' => $config['allow_override'],
|
||||
);
|
||||
return $form;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define defaults.
|
||||
*/
|
||||
public function sourceDefaults() {
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Define defaults.
|
||||
*/
|
||||
public function configDefaults() {
|
||||
return array(
|
||||
'sources' => array(),
|
||||
'rawXML' => array(),
|
||||
'context' => '',
|
||||
'exp' => array(
|
||||
'errors' => FALSE,
|
||||
'tidy' => FALSE,
|
||||
'debug' => array(),
|
||||
'tidy_encoding' => 'UTF8',
|
||||
),
|
||||
'allow_override' => TRUE,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Override parent::sourceFormValidate().
|
||||
*
|
||||
* If the values of this source are the same as the base config we set them to
|
||||
* blank to that the values will be inherited from the importer defaults.
|
||||
*
|
||||
* @param &$values
|
||||
* The values from the form to validate, passed by reference.
|
||||
*/
|
||||
public function sourceFormValidate(&$values) {
|
||||
$config = $this->getConfig();
|
||||
$values = $values['xpath'];
|
||||
$allow_override = $config['allow_override'];
|
||||
unset($config['allow_override']);
|
||||
ksort($values);
|
||||
ksort($config);
|
||||
if ($values === $config || !$allow_override) {
|
||||
$values = array();
|
||||
return;
|
||||
}
|
||||
|
||||
$this->configFormValidate($values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Override parent::sourceFormValidate().
|
||||
*/
|
||||
public function configFormValidate(&$values) {
|
||||
$mappings = $this->getOwnMappings();
|
||||
|
||||
// This tests if we're validating configForm or sourceForm.
|
||||
$config_form = FALSE;
|
||||
if (isset($values['xpath'])) {
|
||||
$values = $values['xpath'];
|
||||
$config_form = TRUE;
|
||||
}
|
||||
$class = get_class($this);
|
||||
$xml = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?>' . "\n<items></items>");
|
||||
$use_errors = libxml_use_internal_errors(TRUE);
|
||||
|
||||
$values['context'] = trim($values['context']);
|
||||
if (!empty($values['context'])) {
|
||||
$result = $xml->xpath($values['context']);
|
||||
}
|
||||
$error = libxml_get_last_error();
|
||||
|
||||
// Error code 1219 is undefined namespace prefix.
|
||||
// Our sample doc doesn't have any namespaces let alone the one they're
|
||||
// trying to use. Besides, if someone is trying to use a namespace in an
|
||||
// XPath query, they're probably right.
|
||||
if ($error && $error->code != 1219) {
|
||||
$element = 'feeds][' . $class . '][xpath][context';
|
||||
if ($config_form) {
|
||||
$element = 'xpath][context';
|
||||
}
|
||||
form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
|
||||
libxml_clear_errors();
|
||||
}
|
||||
foreach ($values['sources'] as $key => &$query) {
|
||||
$query = trim($query);
|
||||
if (!empty($query)) {
|
||||
$result = $xml->xpath($query);
|
||||
$error = libxml_get_last_error();
|
||||
if ($error && $error->code != 1219) {
|
||||
$variable_present = FALSE;
|
||||
// Our variable substitution options can cause syntax errors, check
|
||||
// if we're doing that.
|
||||
if ($error->code == 1207) {
|
||||
foreach ($mappings as $target) {
|
||||
if (strpos($query, '$' . $target) !== FALSE) {
|
||||
$variable_present = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!$variable_present) {
|
||||
$element = 'feeds][' . $class . '][xpath][sources][' . $key;
|
||||
if ($config_form) {
|
||||
$element = 'xpath][sources][' . $key;
|
||||
}
|
||||
form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
|
||||
libxml_clear_errors();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
libxml_use_internal_errors($use_errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Override parent::getMappingSources().
|
||||
*/
|
||||
public function getMappingSources() {
|
||||
$mappings = $this->filterMappings(feeds_importer($this->id)->processor->config['mappings']);
|
||||
$next = 0;
|
||||
if (!empty($mappings)) {
|
||||
$keys = array_keys($mappings);
|
||||
$last_mapping = end($keys);
|
||||
$next = explode(':', $last_mapping);
|
||||
$next = $next[1] + 1;
|
||||
}
|
||||
return array(
|
||||
'xpathparser:' . $next => array(
|
||||
'name' => t('XPath Expression'),
|
||||
'description' => t('Allows you to configure an XPath expression that will populate this field.'),
|
||||
),
|
||||
) + parent::getMappingSources();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the mappings that belong to us i.e. mappings that begin with
|
||||
* "xpathparser:".
|
||||
*
|
||||
* @return array
|
||||
* An array of mappings keyed source => target.
|
||||
*/
|
||||
protected function getOwnMappings() {
|
||||
$importer_config = feeds_importer($this->id)->getConfig();
|
||||
return $this->filterMappings($importer_config['processor']['config']['mappings']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters mappings, returning the ones that belong to us.
|
||||
*
|
||||
* @param array $mappings
|
||||
* A mapping array from a processor.
|
||||
*
|
||||
* @return array
|
||||
* An array of mappings keyed source => target.
|
||||
*/
|
||||
protected function filterMappings($mappings) {
|
||||
$our_mappings = array();
|
||||
foreach ($mappings as $mapping) {
|
||||
if (strpos($mapping['source'], 'xpathparser:') === 0) {
|
||||
$our_mappings[$mapping['source']] = $mapping['target'];
|
||||
}
|
||||
}
|
||||
return $our_mappings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start custom error handling.
|
||||
*
|
||||
* @return bool
|
||||
* The previous value of use_errors.
|
||||
*/
|
||||
protected function errorStart() {
|
||||
return libxml_use_internal_errors(TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop custom error handling.
|
||||
*
|
||||
* @param bool $use
|
||||
* The previous value of use_errors.
|
||||
* @param bool $print
|
||||
* (Optional) Whether to print errors to the screen. Defaults to TRUE.
|
||||
*/
|
||||
protected function errorStop($use, $print = TRUE) {
|
||||
if ($print) {
|
||||
foreach (libxml_get_errors() as $error) {
|
||||
switch ($error->level) {
|
||||
case LIBXML_ERR_WARNING:
|
||||
case LIBXML_ERR_ERROR:
|
||||
$type = 'warning';
|
||||
break;
|
||||
case LIBXML_ERR_FATAL:
|
||||
$type = 'error';
|
||||
break;
|
||||
|
||||
}
|
||||
$message = t('%error on line %num. Error code: %code', array('%error' => trim($error->message), '%num' => $error->line, '%code' => $error->code));
|
||||
drupal_set_message($message, $type, FALSE);
|
||||
}
|
||||
}
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors($use);
|
||||
}
|
||||
|
||||
abstract protected function getRaw(DOMNode $node);
|
||||
}
|
144
sites/all/modules/feeds_xpathparser/FeedsXPathParserDOMXPath.inc
Normal file
144
sites/all/modules/feeds_xpathparser/FeedsXPathParserDOMXPath.inc
Normal file
@@ -0,0 +1,144 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Provides a custom version of DOMXPath for use with feeds_xpathparser.
|
||||
*/
|
||||
class FeedsXPathParserDOMXPath extends DOMXPath {
|
||||
protected $config = array();
|
||||
protected $modifiedQueries = array();
|
||||
|
||||
public function __construct(DOMDocument $doc) {
|
||||
$this->namespaces = array();
|
||||
$simple = simplexml_import_dom($doc);
|
||||
// An empty DOMDocument will make $simple NULL.
|
||||
if ($simple !== NULL) {
|
||||
$this->namespaces = $simple->getNamespaces(TRUE);
|
||||
}
|
||||
$this->doc = $doc;
|
||||
parent::__construct($doc);
|
||||
}
|
||||
|
||||
public function setConfig(array $config) {
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
protected function debug($data, $source) {
|
||||
$output = "$source : <ul>";
|
||||
if ($data instanceof DOMNodeList) {
|
||||
foreach ($data as $node) {
|
||||
$output .= '<li>' . check_plain($this->doc->saveXML($node)) . '</li>';
|
||||
}
|
||||
}
|
||||
else {
|
||||
$output .= '<li>' . check_plain($data) . '</li>';
|
||||
}
|
||||
$output .= '</ul>';
|
||||
drupal_set_message($output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes an XPath query with namespace support.
|
||||
*
|
||||
* @param $xpath
|
||||
* The DOMXPath object.
|
||||
*
|
||||
* @param $query
|
||||
* An XPath query.
|
||||
*
|
||||
* @return array
|
||||
* An array containing the results of the query.
|
||||
*/
|
||||
public function namespacedQuery($query, $context, $source) {
|
||||
$this->addDefaultNamespace($query);
|
||||
$results = $this->_query($query, $context);
|
||||
if (in_array($source, $this->config['debug'])) {
|
||||
$this->debug($results, $source);
|
||||
}
|
||||
|
||||
if (is_object($this->error) && $this->config['errors']) {
|
||||
|
||||
if ($this->error->level == LIBXML_ERR_ERROR) {
|
||||
drupal_set_message(
|
||||
t("There was an error during the XPath query: %query.<br />
|
||||
Libxml returned the message: %message, with the error code: %code.",
|
||||
array('%query' => $query,
|
||||
'%message' => trim($this->error->message),
|
||||
'%code' => $this->error->code)),
|
||||
'error',
|
||||
FALSE);
|
||||
}
|
||||
elseif ($this->error->level == LIBXML_ERR_WARNING) {
|
||||
drupal_set_message(
|
||||
t("There was an error during the XPath query: %query.<br />
|
||||
Libxml returned the message: %message, with the error code: %code.",
|
||||
array('%query' => $query,
|
||||
'%message' => trim($this->error->message),
|
||||
'%code' => $this->error->code)),
|
||||
'warning',
|
||||
FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
// DOMXPath::evaluate() and DOMXPath::query() will return FALSE on error or
|
||||
// if the value is false. We check error result and return NULL in case
|
||||
// of error.
|
||||
if (is_object($this->error) && $this->error->level == LIBXML_ERR_ERROR) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes XPath queries, adding the default namespace.
|
||||
*
|
||||
* @param $query
|
||||
* An XPath query string
|
||||
*/
|
||||
protected function addDefaultNamespace(&$query) {
|
||||
foreach ($this->namespaces as $prefix => $namespace) {
|
||||
if ($prefix === '') {
|
||||
$this->registerNamespace('__default__', $namespace);
|
||||
|
||||
// Replace all the elements without prefix by the default prefix.
|
||||
if (!isset($this->modifiedQueries[$query])) {
|
||||
$parser = new FeedsXPathParserQueryParser($query);
|
||||
$modQuery = $parser->getQuery();
|
||||
$this->modifiedQueries[$query] = $modQuery;
|
||||
$query = $modQuery;
|
||||
}
|
||||
else {
|
||||
$query = $this->modifiedQueries[$query];
|
||||
}
|
||||
}
|
||||
else {
|
||||
$this->registerNamespace($prefix, $namespace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Here we set libxml_use_internal_errors to TRUE because depending on the
|
||||
* libxml version, $xml->xpath() might return FALSE or an empty array() when
|
||||
* a query doesn't match.
|
||||
*/
|
||||
protected function _query($query, $context = NULL) {
|
||||
$use_errors = libxml_use_internal_errors(TRUE);
|
||||
|
||||
// Perfom XPath query.
|
||||
// So, grrr. FALSE is returned when there is an error. However, FALSE is
|
||||
// also a valid return value from DOMXPath::evaluate(). Ex: '1 = 2'
|
||||
if ($context) {
|
||||
$results = $this->evaluate($query, $context);
|
||||
}
|
||||
else {
|
||||
$results = $this->query($query);
|
||||
}
|
||||
|
||||
$this->error = libxml_get_last_error();
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors($use_errors);
|
||||
return $results;
|
||||
}
|
||||
}
|
51
sites/all/modules/feeds_xpathparser/FeedsXPathParserHTML.inc
Normal file
51
sites/all/modules/feeds_xpathparser/FeedsXPathParserHTML.inc
Normal file
@@ -0,0 +1,51 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @files
|
||||
* Provides the FeedsXPathParserHTML class.
|
||||
*/
|
||||
class FeedsXPathParserHTML extends FeedsXPathParserBase {
|
||||
|
||||
/**
|
||||
* Implements FeedsXPathParserBase::setup().
|
||||
*/
|
||||
protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
|
||||
|
||||
if (!empty($source_config['exp']['tidy'])) {
|
||||
$config = array(
|
||||
'merge-divs' => FALSE,
|
||||
'merge-spans' => FALSE,
|
||||
'join-styles' => FALSE,
|
||||
'drop-empty-paras' => FALSE,
|
||||
'wrap' => 0,
|
||||
'tidy-mark' => FALSE,
|
||||
'escape-cdata' => TRUE,
|
||||
'word-2000' => TRUE,
|
||||
);
|
||||
// Default tidy encoding is UTF8.
|
||||
$encoding = $source_config['exp']['tidy_encoding'];
|
||||
$raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
|
||||
}
|
||||
else {
|
||||
$raw = $fetcher_result->getRaw();
|
||||
}
|
||||
$doc = new DOMDocument();
|
||||
// Use our own error handling.
|
||||
$use = $this->errorStart();
|
||||
$success = $doc->loadHTML($raw);
|
||||
unset($raw);
|
||||
$this->errorStop($use, $source_config['exp']['errors']);
|
||||
if (!$success) {
|
||||
throw new Exception(t('There was an error parsing the HTML document.'));
|
||||
}
|
||||
return $doc;
|
||||
}
|
||||
|
||||
protected function getRaw(DOMNode $node) {
|
||||
// DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
|
||||
if (version_compare(phpversion(), '5.3.6', '>=')) {
|
||||
return $this->doc->saveHTML($node);
|
||||
}
|
||||
return $this->doc->saveXML($node);
|
||||
}
|
||||
}
|
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Pseudo-parser of XPath queries. When an XML document has a default
|
||||
* namespace this gets called so that adding the __default__ namepace where
|
||||
* appropriate. Aren't we nice?
|
||||
*
|
||||
* @todo
|
||||
* Cleanup.
|
||||
* @param $query
|
||||
* An XPath query string.
|
||||
* @return string
|
||||
* An XPath query string with the __default__ namespace added.
|
||||
*/
|
||||
class FeedsXPathParserQueryParser {
|
||||
function __construct($query) {
|
||||
$this->query = preg_replace('/\s+\(\s*/', '(', $query);
|
||||
|
||||
$this->word_boundaries = array(
|
||||
'[', ']', '=', '(', ')', '.', '<', '>', '*', '!', '|', '/', ',', ' ', ':',
|
||||
);
|
||||
$this->in_quotes = FALSE;
|
||||
$this->quote_char = '';
|
||||
$this->word = '';
|
||||
$this->output = '';
|
||||
$this->prev_boundary = '';
|
||||
$this->axis = '';
|
||||
$this->skip_next_word = FALSE;
|
||||
$this->start();
|
||||
}
|
||||
|
||||
function start() {
|
||||
for ($i=0; $i < drupal_strlen($this->query); $i++) {
|
||||
$this->i = $i;
|
||||
$c = $this->query[$i];
|
||||
|
||||
if ($c == '"' || $c == "'") {
|
||||
$this->handle_quote($c);
|
||||
continue;
|
||||
}
|
||||
if ($this->in_quotes) {
|
||||
$this->word .= $c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_array($c, $this->word_boundaries)) {
|
||||
$this->handle_word_boundary($c);
|
||||
}
|
||||
else {
|
||||
$this->word .= $c;
|
||||
}
|
||||
}
|
||||
$this->handle_word();
|
||||
}
|
||||
|
||||
function handle_quote($c) {
|
||||
if ($this->in_quotes && $c == $this->quote_char) {
|
||||
$this->in_quotes = FALSE;
|
||||
$this->word .= $c;
|
||||
$this->output .= $this->word;
|
||||
$this->word = '';
|
||||
}
|
||||
elseif (!$this->in_quotes) {
|
||||
$this->in_quotes = TRUE;
|
||||
$this->handle_word();
|
||||
$this->word = $c;
|
||||
$this->quote_char = $c;
|
||||
}
|
||||
else {
|
||||
$this->word .= $c;
|
||||
}
|
||||
}
|
||||
|
||||
function handle_word_boundary($c) {
|
||||
if (in_array($this->word, array('div', 'or', 'and', 'mod')) &&
|
||||
$this->prev_boundary == ' ' && $c == ' ') {
|
||||
$this->output .= $this->word;
|
||||
}
|
||||
else {
|
||||
$this->handle_word($c);
|
||||
}
|
||||
$this->output .= $c;
|
||||
$this->word = '';
|
||||
$this->prev_boundary = $c;
|
||||
}
|
||||
|
||||
function handle_word($c='') {
|
||||
if ($this->word == '') {
|
||||
return;
|
||||
}
|
||||
if ($c == ':' && $this->query[$this->i + 1] == ':') {
|
||||
$this->axis = $this->word;
|
||||
}
|
||||
if ($c == ':' && $this->query[$this->i - 1] != ':' &&
|
||||
$this->query[$this->i + 1] != ':') {
|
||||
$this->output .= $this->word;
|
||||
$this->skip_next_word = TRUE;
|
||||
return;
|
||||
}
|
||||
if ($this->skip_next_word) {
|
||||
$this->skip_next_word = FALSE;
|
||||
$this->output .= $this->word;
|
||||
return;
|
||||
}
|
||||
if (is_numeric($this->word) ||
|
||||
$this->axis == 'attribute' ||
|
||||
strpos($this->word, '@') === 0 ||
|
||||
$c == '(' ||
|
||||
$c == ':') {
|
||||
$this->output .= $this->word;
|
||||
return;
|
||||
}
|
||||
$this->output .= '__default__:' . $this->word;
|
||||
}
|
||||
|
||||
function getQuery() {
|
||||
return $this->output;
|
||||
}
|
||||
}
|
41
sites/all/modules/feeds_xpathparser/FeedsXPathParserXML.inc
Normal file
41
sites/all/modules/feeds_xpathparser/FeedsXPathParserXML.inc
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @files
|
||||
* Provides the FeedsXPathParserXML class.
|
||||
*/
|
||||
class FeedsXPathParserXML extends FeedsXPathParserBase {
|
||||
|
||||
/**
|
||||
* Implements FeedsXPathParserBase::setup().
|
||||
*/
|
||||
protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
|
||||
|
||||
if (!empty($source_config['exp']['tidy'])) {
|
||||
$config = array(
|
||||
'input-xml' => TRUE,
|
||||
'wrap' => 0,
|
||||
'tidy-mark' => FALSE,
|
||||
);
|
||||
// Default tidy encoding is UTF8.
|
||||
$encoding = $source_config['exp']['tidy_encoding'];
|
||||
$raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
|
||||
}
|
||||
else {
|
||||
$raw = $fetcher_result->getRaw();
|
||||
}
|
||||
$doc = new DOMDocument();
|
||||
$use = $this->errorStart();
|
||||
$success = $doc->loadXML($raw);
|
||||
unset($raw);
|
||||
$this->errorStop($use, $source_config['exp']['errors']);
|
||||
if (!$success) {
|
||||
throw new Exception(t('There was an error parsing the XML document.'));
|
||||
}
|
||||
return $doc;
|
||||
}
|
||||
|
||||
protected function getRaw(DOMNode $node) {
|
||||
return $this->doc->saveXML($node);
|
||||
}
|
||||
}
|
339
sites/all/modules/feeds_xpathparser/LICENSE.txt
Normal file
339
sites/all/modules/feeds_xpathparser/LICENSE.txt
Normal file
@@ -0,0 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
21
sites/all/modules/feeds_xpathparser/feeds_xpathparser.info
Normal file
21
sites/all/modules/feeds_xpathparser/feeds_xpathparser.info
Normal file
@@ -0,0 +1,21 @@
|
||||
name = Feeds XPath Parser
|
||||
description = Parse an XML or HTML document using XPath.
|
||||
package = Feeds
|
||||
dependencies[] = feeds
|
||||
files[] = FeedsXPathParserBase.inc
|
||||
files[] = FeedsXPathParserHTML.inc
|
||||
files[] = FeedsXPathParserXML.inc
|
||||
files[] = FeedsXPathParserQueryParser.inc
|
||||
files[] = FeedsXPathParserDOMXPath.inc
|
||||
files[] = tests/feeds_xpathparser.test
|
||||
files[] = tests/feeds_xpathparser_parser_html.test
|
||||
files[] = tests/feeds_xpathparser_parser_xml.test
|
||||
files[] = tests/feeds_xpathparser_query_parser.test
|
||||
core = 7.x
|
||||
|
||||
; Information added by drupal.org packaging script on 2012-04-25
|
||||
version = "7.x-1.0-beta3+7-dev"
|
||||
core = "7.x"
|
||||
project = "feeds_xpathparser"
|
||||
datestamp = "1335312969"
|
||||
|
@@ -0,0 +1,11 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* hook_update_n()'s for Feeds XPath Parser.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Update to trigger cahce refresh.
|
||||
*/
|
||||
function feeds_xpathparser_update_7101() {}
|
27
sites/all/modules/feeds_xpathparser/feeds_xpathparser.module
Normal file
27
sites/all/modules/feeds_xpathparser/feeds_xpathparser.module
Normal file
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Implements hook_feeds_plugins().
|
||||
*/
|
||||
function feeds_xpathparser_feeds_plugins() {
|
||||
return array(
|
||||
'FeedsXPathParserHTML' => array(
|
||||
'name' => t('XPath HTML parser'),
|
||||
'description' => t('Parse HTML using XPath.'),
|
||||
'handler' => array(
|
||||
'parent' => 'FeedsParser',
|
||||
'class' => 'FeedsXPathParserHTML',
|
||||
'file' => 'FeedsXPathParserHTML.inc',
|
||||
),
|
||||
),
|
||||
'FeedsXPathParserXML' => array(
|
||||
'name' => t('XPath XML parser'),
|
||||
'description' => t('Parse XML using XPath.'),
|
||||
'handler' => array(
|
||||
'parent' => 'FeedsParser',
|
||||
'class' => 'FeedsXPathParserXML',
|
||||
'file' => 'FeedsXPathParserXML.inc',
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
@@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Tests for FeedsXPathParserXML.inc.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test single feeds.
|
||||
*/
|
||||
class FeedsXPathParserWebTestCase extends FeedsWebTestCase {
|
||||
|
||||
/**
|
||||
* Set up test.
|
||||
*/
|
||||
public function setUp() {
|
||||
parent::setUp('feeds_xpathparser');
|
||||
|
||||
// Set the front page to show 30 nodes so we can easily see what is aggregated.
|
||||
$edit = array('default_nodes_main' => 30);
|
||||
$this->drupalPost('admin/config/system/site-information', $edit, 'Save configuration');
|
||||
|
||||
// Set the teaser length display to unlimited otherwise tests looking for
|
||||
// text on nodes will fail.
|
||||
$edit = array('fields[body][type]' => 'text_default');
|
||||
$this->drupalPost('admin/structure/types/manage/article/display/teaser', $edit, 'Save');
|
||||
|
||||
// Generalize across my version of feeds and the standard one.
|
||||
$items = feeds_ui_menu();
|
||||
if (isset($items['admin/structure/feeds/%feeds_importer/edit'])) {
|
||||
$this->feeds_base = 'admin/structure/feeds';
|
||||
}
|
||||
else {
|
||||
$this->feeds_base = 'admin/structure/feeds/edit';
|
||||
}
|
||||
}
|
||||
|
||||
function postAndCheck($url, $edit, $button, $saved_text) {
|
||||
$this->drupalPost($url, $edit, $button);
|
||||
$this->assertText($saved_text);
|
||||
$this->drupalGet($url);
|
||||
foreach ($edit as $key => $value) {
|
||||
$this->assertFieldByName($key, $value);
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>albino</title>
|
||||
<link albino="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>bear</title>
|
||||
<link bear="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<!-- Empty entry here intentionally. See #1189192.-->
|
||||
<entry>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>cook</title>
|
||||
<link cook="http://example.org/2003/12/13/atom04"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6b</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>physics</title>
|
||||
<link physics="http://example.org/2003/12/13/atom05"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6c</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>
|
@@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Atom Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<!-- Empty entry here intentionally. See #1189192.-->
|
||||
<entry>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>My dog Jack is the best.</title>
|
||||
<link href="http://example.org/2003/12/13/atom04"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6b</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>Physics is cool.</title>
|
||||
<link href="http://example.org/2003/12/13/atom05"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6c</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>
|
@@ -0,0 +1,36 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Atom Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>My dog Jack is the best.</title>
|
||||
<link href="http://example.org/2003/12/13/atom04"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6b</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>Physics is really cool.</title>
|
||||
<link href="http://example.org/2003/12/13/atom05"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6c</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>
|
@@ -0,0 +1,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Test</title>
|
||||
<style type="text/css">bla bla</style>
|
||||
</head>
|
||||
<body>bla bla</body>
|
||||
</html>
|
@@ -0,0 +1,213 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Tests for FeedsXPathParser.inc.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test single feeds.
|
||||
*/
|
||||
class FeedsXPathParseHTMLTestCase extends FeedsXPathParserWebTestCase {
|
||||
|
||||
/**
|
||||
* Describe this test.
|
||||
*/
|
||||
public static function getInfo() {
|
||||
return array(
|
||||
'name' => 'HTML Parser',
|
||||
'description' => 'Regression tests for Feeds XPath HTML parser.',
|
||||
'group' => 'Feeds XPath Parser',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tests.
|
||||
*/
|
||||
public function test() {
|
||||
$this->createImporterConfiguration('XPath', 'xpath');
|
||||
|
||||
$this->setPlugin('xpath', 'FeedsXPathParserHTML');
|
||||
$this->addMappings('xpath',
|
||||
array(
|
||||
array(
|
||||
'source' => 'xpathparser:0',
|
||||
'target' => 'title',
|
||||
'unique' => FALSE,
|
||||
),
|
||||
array(
|
||||
'source' => 'xpathparser:1',
|
||||
'target' => 'url',
|
||||
'unique' => TRUE,
|
||||
),
|
||||
)
|
||||
);
|
||||
// Set importer default settings.
|
||||
$importer_url = $this->feeds_base . '/xpath/settings/FeedsXPathParserHTML';
|
||||
$edit = array(
|
||||
'xpath[context]' => '//tr[starts-with(@class, "odd ") or starts-with(@class, "even ")]',
|
||||
'xpath[sources][xpathparser:0]' => 'td[1]/a',
|
||||
'xpath[sources][xpathparser:1]' => 'td[1]/a/@href',
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, t('Save'), t('Your changes have been saved.'));
|
||||
|
||||
// Test import.
|
||||
$path = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds_xpathparser') . '/tests/feeds_xpathparser/';
|
||||
$nid = $this->createFeedNode('xpath', $path . 'issues_drupal.org.htm', 'Testing XPath HTML Parser');
|
||||
$feed_node_edit_url = 'node/' . $nid . '/edit';
|
||||
$this->assertText(t('Created 29 nodes'));
|
||||
|
||||
// Import again, this verifies url field was mapped correctly.
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), t('Import'));
|
||||
$this->assertText(t('There are no new nodes'));
|
||||
|
||||
// Assert accuracy of aggregated content. I find humor in using our own
|
||||
// issue queue to run tests against.
|
||||
$this->drupalGet('node');
|
||||
$this->assertText('Xpath Functions');
|
||||
$this->assertText('Unable to upload .html files');
|
||||
$this->assertText('Import to multiple content types');
|
||||
$this->assertText('Parser includes tags in mapped output');
|
||||
$this->assertText('Errors');
|
||||
$this->assertText('Loop through HTML - all data is in one node?');
|
||||
$this->assertText('Patch: add encoding options for PHP tidy feature');
|
||||
$this->assertText('Import and Maintain 1300+ Node Items');
|
||||
$this->assertText('Documentation update');
|
||||
$this->assertText('An HTTP error 404 occured');
|
||||
$this->assertText('Does it work with Feeds Image Grabber');
|
||||
$this->assertText('Node published date not being correctly mapped (set to 1 Jan 1970)');
|
||||
$this->assertText('fields to fill xpath not displayed in importer interface except for "body"');
|
||||
$this->assertText('parsing link field');
|
||||
$this->assertText('Error when switching to XML Parser');
|
||||
$this->assertText('Duplicate content even if "unique target" is set');
|
||||
$this->assertText('Labels/field names become meaningless with Data Processor');
|
||||
$this->assertText('Xpath namespace help');
|
||||
$this->assertText('warning: mysql_real_escape_string()');
|
||||
$this->assertText('Feeds XPath Parser: warning: Invalid argument');
|
||||
$this->assertText('What am I missing? FeedsXPathParser: No mappings are defined.');
|
||||
$this->assertText('CDATA in tag not producing text');
|
||||
$this->assertText('Cant map empty fields');
|
||||
$this->assertText('Support literal XPath expressions');
|
||||
$this->assertText('adding a prefix to a parsed xml value.');
|
||||
$this->assertText('Mapping on import');
|
||||
$this->assertText('Feeds XPath Parser: HTML parser example for number expressions');
|
||||
$this->assertText("I dont want to define any field queries");
|
||||
$this->assertText("Document // and other syntax for this module a little better");
|
||||
|
||||
// Test debugging.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserHTML][xpath][exp][debug][xpathparser:0]' => TRUE,
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, t('Save'), 'Basic page Testing XPath HTML Parser has been updated.');
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), t('Import'));
|
||||
$this->assertText('<a href="http://drupal.org/node/976478">Xpath Functions</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1048030">Unable to upload .html files</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1050310">Import to multiple content types</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1047788">Parser includes tags in mapped output</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1043608">Errors</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1044546">Loop through HTML - all data is in one node?</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1043728">Patch: add encoding options for PHP tidy feature</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1040132">Import and Maintain 1300+ Node Items</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1043604">Documentation update</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1039492">An HTTP error 404 occured</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1042048">Does it work with Feeds Image Grabber</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/961158">Node published date not being correctly mapped (set to 1 Jan 1970)</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1021474">fields to fill xpath not displayed in importer interface except for "body"</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1040530">parsing link field</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1038912">Error when switching to XML Parser</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1032340">Duplicate content even if "unique target" is set</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/982102">Labels/field names become meaningless with Data Processor</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/1034758">Xpath namespace help</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/908458">warning: mysql_real_escape_string()</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/869076">Feeds XPath Parser: warning: Invalid argument</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/991386">What am I missing? FeedsXPathParser: No mappings are defined.</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/990972">CDATA in tag not producing text</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/989948">Cant map empty fields</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/973324">Support literal XPath expressions</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/958344">adding a prefix to a parsed xml value.</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/914216">Mapping on import</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/863714">Feeds XPath Parser: HTML parser example for number expressions</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/915856">I dont want to define any field queries</a>');
|
||||
$this->assertText('<a href="http://drupal.org/node/950150">Document // and other syntax for this module a little better</a>');
|
||||
$this->assertText(t('There are no new nodes'));
|
||||
// Turn debugging off.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserHTML][xpath][exp][debug][xpathparser:0]' => FALSE,
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, t('Save'), 'Basic page Testing XPath HTML Parser has been updated.');
|
||||
|
||||
// Test that overriding default settings works.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserHTML][xpath][context]' => '/foo',
|
||||
'feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:0]' => 'bar',
|
||||
'feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:1]' => 'baz',
|
||||
);
|
||||
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, t('Save'), 'Basic page Testing XPath HTML Parser has been updated.');
|
||||
|
||||
// Assert the we don't create an empty node when XPath values don't return anything.
|
||||
// That happened at one point.
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), t('Import'));
|
||||
$this->assertText(t('There are no new nodes'));
|
||||
|
||||
// Test that validation works.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserHTML][xpath][context]' => 'sdf asf',
|
||||
'feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:0]' => 'asdf[sadfas asdf]',
|
||||
);
|
||||
$this->drupalPost($feed_node_edit_url, $edit, 'Save');
|
||||
// Check for valid error messages.
|
||||
$this->assertText('There was an error with the XPath selector: Invalid expression');
|
||||
$this->assertText('There was an error with the XPath selector: Invalid predicate');
|
||||
// Make sure the fields are errored out correctly. I.e. we have red outlines.
|
||||
$this->assertFieldByXPath('//input[@id="edit-feeds-feedsxpathparserhtml-xpath-context"][1]/@class', 'form-text required error');
|
||||
$this->assertFieldByXPath('//input[@id="edit-feeds-feedsxpathparserhtml-xpath-sources-xpathparser0"][1]/@class', 'form-text error');
|
||||
|
||||
// Put the values back so we can test inheritance if the form was changed
|
||||
// and then changed back.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserHTML][xpath][context]' => '//tr[starts-with(@class, "odd ") or starts-with(@class, "even ")]',
|
||||
'feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:0]' => 'td[1]/a',
|
||||
'feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:1]' => 'td[1]/a/@href',
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, t('Save'), t('Basic page Testing XPath HTML Parser has been updated.'));
|
||||
|
||||
// Change importer defaults.
|
||||
$edit = array(
|
||||
'xpath[context]' => '//tr',
|
||||
'xpath[sources][xpathparser:0]' => 'booya',
|
||||
'xpath[sources][xpathparser:1]' => 'boyz',
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, t('Save'), t('Your changes have been saved.'));
|
||||
|
||||
// Make sure the changes propigated.
|
||||
$this->drupalGet($feed_node_edit_url);
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserHTML][xpath][context]', '//tr');
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:0]', 'booya');
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserHTML][xpath][sources][xpathparser:1]', 'boyz');
|
||||
|
||||
//Cleanup
|
||||
$this->drupalPost("node/$nid/delete-items", array(), t('Delete'));
|
||||
$this->assertText(t('Deleted 29 nodes'));
|
||||
|
||||
$this->_testGetRaw($importer_url);
|
||||
}
|
||||
|
||||
|
||||
public function _testGetRaw($importer_url) {
|
||||
// Change importer defaults.
|
||||
$edit = array(
|
||||
'xpath[context]' => '/html',
|
||||
'xpath[sources][xpathparser:0]' => 'head/title',
|
||||
'xpath[sources][xpathparser:1]' => '*',
|
||||
'xpath[rawXML][xpathparser:1]' => TRUE,
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, t('Save'), t('Your changes have been saved.'));
|
||||
$path = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds_xpathparser') . '/tests/feeds_xpathparser/';
|
||||
$nid = $this->createFeedNode('xpath', $path . 'simple.html', 'Testing GetRaw');
|
||||
$feed_node_edit_url = "node/$nid/edit";
|
||||
$this->assertText(t('Created 1 node'));
|
||||
$url = 'node/' . $nid + 1 . '/edit';
|
||||
$this->drupalGet($url);
|
||||
}
|
||||
}
|
@@ -0,0 +1,241 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Tests for FeedsXPathParserXML.inc.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test single feeds.
|
||||
*/
|
||||
class FeedsXPathParserXMLTestCase extends FeedsXPathParserWebTestCase {
|
||||
|
||||
/**
|
||||
* Describe this test.
|
||||
*/
|
||||
public static function getInfo() {
|
||||
return array(
|
||||
'name' => 'XML Parser',
|
||||
'description' => 'Regression tests for Feeds XPath XML parser.',
|
||||
'group' => 'Feeds XPath Parser',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tests.
|
||||
*/
|
||||
public function test() {
|
||||
$this->createImporterConfiguration('XPathXML', 'xpath_xml');
|
||||
|
||||
$this->setPlugin('xpath_xml', 'FeedsXPathParserXML');
|
||||
$importer_url = $this->feeds_base . '/xpath_xml/settings/FeedsXPathParserXML';
|
||||
// Check help message.
|
||||
$this->drupalGet($importer_url);
|
||||
$this->assertText('No XPath mappings are defined.');
|
||||
|
||||
$this->addMappings('xpath_xml',
|
||||
array(
|
||||
array(
|
||||
'source' => 'xpathparser:0',
|
||||
'target' => 'title',
|
||||
'unique' => FALSE,
|
||||
),
|
||||
array(
|
||||
'source' => 'xpathparser:1',
|
||||
'target' => 'guid',
|
||||
'unique' => TRUE,
|
||||
),
|
||||
array(
|
||||
'source' => 'xpathparser:2',
|
||||
'target' => 'body',
|
||||
'unique' => FALSE,
|
||||
),
|
||||
)
|
||||
);
|
||||
// Set importer default settings.
|
||||
$edit = array(
|
||||
'xpath[context]' => '//entry',
|
||||
'xpath[sources][xpathparser:0]' => 'title',
|
||||
'xpath[sources][xpathparser:1]' => 'id',
|
||||
'xpath[sources][xpathparser:2]' => 'id',
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, 'Save', 'Your changes have been saved.');
|
||||
|
||||
// Test import.
|
||||
$path = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds_xpathparser') . '/tests/feeds_xpathparser/';
|
||||
// We use an atom feed so that we can test that default namespaces are being
|
||||
// applied appropriately.
|
||||
$nid = $this->createFeedNode('xpath_xml', $path . 'sample_atom_feed.xml', 'Testing XPath XML Parser');
|
||||
$feed_node_edit_url = 'node/' . $nid . '/edit';
|
||||
$this->assertText('Created 3 nodes');
|
||||
|
||||
// Import again, this verifies url field was mapped correctly.
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), 'Import');
|
||||
$this->assertText('There are no new nodes');
|
||||
|
||||
// Assert accuracy of aggregated content. I find humor in using our own
|
||||
// issue queue to run tests against.
|
||||
$this->drupalGet('node');
|
||||
$this->assertText('Atom-Powered Robots Run Amok');
|
||||
$this->assertText('My dog Jack is the best.');
|
||||
$this->assertText('Physics is cool.');
|
||||
|
||||
// Test debugging.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserXML][xpath][exp][debug][xpathparser:0]' => TRUE,
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), 'Import');
|
||||
$this->assertText('<title>Atom-Powered Robots Run Amok</title>');
|
||||
$this->assertText('<title>My dog Jack is the best.</title>');
|
||||
$this->assertText('<title>Physics is cool.</title>');
|
||||
$this->assertText('There are no new nodes.');
|
||||
|
||||
// Turn debugging off.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserXML][xpath][exp][debug][xpathparser:0]' => FALSE,
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
|
||||
// Check if update existing nodes works.
|
||||
$this->setSettings('xpath_xml', 'FeedsNodeProcessor', array('update_existing' => 2));
|
||||
$edit = array(
|
||||
'feeds[FeedsHTTPFetcher][source]' => $path . 'sample_atom_feed_updated.xml',
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), 'Import');
|
||||
$this->assertText('Updated 1 node.');
|
||||
$this->drupalGet('node');
|
||||
$this->assertText('Atom-Powered Robots Run Amok');
|
||||
$this->assertText('My dog Jack is the best.');
|
||||
$this->assertText('Physics is really cool.'); // The one that changed.
|
||||
$this->assertNoText('Physics is cool.'); // Make sure the old node is changed.
|
||||
// Be extra sure we updated.
|
||||
$this->drupalGet('node/4');
|
||||
$this->assertText('Physics is really cool.');
|
||||
|
||||
// Check if replace existing nodes works.
|
||||
$this->setSettings('xpath_xml', 'FeedsNodeProcessor', array('update_existing' => 1));
|
||||
$edit = array(
|
||||
'feeds[FeedsHTTPFetcher][source]' => $path . 'sample_atom_feed.xml',
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), 'Import');
|
||||
$this->assertText('Updated 1 node.');
|
||||
$this->drupalGet('node');
|
||||
$this->assertText('Atom-Powered Robots Run Amok');
|
||||
$this->assertText('My dog Jack is the best.');
|
||||
$this->assertText('Physics is cool.'); // The one that changed.
|
||||
$this->assertNoText('Physics is really cool.'); // Make sure the old node is changed.
|
||||
// Be extra sure we updated.
|
||||
$this->drupalGet('node/4');
|
||||
$this->assertText('Physics is cool.');
|
||||
|
||||
// Test that overriding default settings works.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserXML][xpath][context]' => '/foo',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:0]' => 'bar',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:1]' => 'baz',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:2]' => 'wee',
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
|
||||
// Assert the we don't create an empty node when XPath values don't return anything.
|
||||
// That happened at one point.
|
||||
$this->drupalPost('node/' . $nid . '/import', array(), 'Import');
|
||||
$this->assertText('There are no new nodes.');
|
||||
|
||||
// Test that validation works.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserXML][xpath][context]' => 'sdf asf',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:0]' => 'asdf[sadfas asdf]',
|
||||
);
|
||||
$this->drupalPost($feed_node_edit_url, $edit, 'Save');
|
||||
// Check for valid error messages.
|
||||
$this->assertText('There was an error with the XPath selector: Invalid expression');
|
||||
$this->assertText('There was an error with the XPath selector: Invalid predicate');
|
||||
// Make sure the fields are errored out correctly. I.e. we have red outlines.
|
||||
$this->assertFieldByXPath('//input[@id="edit-feeds-feedsxpathparserxml-xpath-context"][1]/@class', 'form-text required error');
|
||||
$this->assertFieldByXPath('//input[@id="edit-feeds-feedsxpathparserxml-xpath-sources-xpathparser0"][1]/@class', 'form-text error');
|
||||
|
||||
// Put the values back so we can test inheritance if the form was changed
|
||||
// and then changed back.
|
||||
$edit = array(
|
||||
'feeds[FeedsXPathParserXML][xpath][context]' => '//entry',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:0]' => 'title',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:1]' => 'id',
|
||||
'feeds[FeedsXPathParserXML][xpath][sources][xpathparser:2]' => 'id',
|
||||
);
|
||||
$this->postAndCheck($feed_node_edit_url, $edit, 'Save', 'Basic page Testing XPath XML Parser has been updated.');
|
||||
|
||||
// Change importer defaults.
|
||||
$edit = array(
|
||||
'xpath[context]' => '//tr',
|
||||
'xpath[sources][xpathparser:0]' => 'booya',
|
||||
'xpath[sources][xpathparser:1]' => 'boyz',
|
||||
'xpath[sources][xpathparser:2]' => 'woot',
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, 'Save', 'Your changes have been saved.');
|
||||
|
||||
// Make sure the changes propigated.
|
||||
$this->drupalGet($feed_node_edit_url);
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserXML][xpath][context]', '//tr');
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserXML][xpath][sources][xpathparser:0]', 'booya');
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserXML][xpath][sources][xpathparser:1]', 'boyz');
|
||||
$this->assertFieldByName('feeds[FeedsXPathParserXML][xpath][sources][xpathparser:2]', 'woot');
|
||||
// Check that our message comes out correct.
|
||||
$this->assertText('Field guid is mandatory and considered unique: only one item per guid value will be created.');
|
||||
|
||||
// Check that allow_override works as expected.
|
||||
$this->setSettings('xpath_xml', 'FeedsXPathParserXML', array('xpath[allow_override]' => FALSE));
|
||||
$this->drupalGet($feed_node_edit_url);
|
||||
$this->assertNoText('XPath Parser Settings');
|
||||
$this->assertNoField('xpath[context]');
|
||||
}
|
||||
|
||||
/**
|
||||
* Test variable substitution.
|
||||
*/
|
||||
public function testVariables() {
|
||||
$this->createImporterConfiguration();
|
||||
|
||||
$this->setPlugin('syndication', 'FeedsXPathParserXML');
|
||||
$importer_url = $this->feeds_base . '/syndication/settings/FeedsXPathParserXML';
|
||||
$this->addMappings('syndication',
|
||||
array(
|
||||
array(
|
||||
'source' => 'xpathparser:0',
|
||||
'target' => 'title',
|
||||
'unique' => FALSE,
|
||||
),
|
||||
array(
|
||||
'source' => 'xpathparser:1',
|
||||
'target' => 'guid',
|
||||
'unique' => TRUE,
|
||||
),
|
||||
array(
|
||||
'source' => 'xpathparser:2',
|
||||
'target' => 'body',
|
||||
'unique' => FALSE,
|
||||
),
|
||||
)
|
||||
);
|
||||
// Set importer default settings.
|
||||
$edit = array(
|
||||
'xpath[context]' => '//entry',
|
||||
'xpath[sources][xpathparser:0]' => 'title',
|
||||
'xpath[sources][xpathparser:1]' => 'id',
|
||||
'xpath[sources][xpathparser:2]' => 'link/@$title',
|
||||
);
|
||||
$this->postAndCheck($importer_url, $edit, 'Save', 'Your changes have been saved.');
|
||||
|
||||
// Test import.
|
||||
$path = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds_xpathparser') . '/tests/feeds_xpathparser/';
|
||||
// We use an atom feed so that we can test that default namespaces are being
|
||||
// applied appropriately.
|
||||
$nid = $this->createFeedNode('syndication', $path . 'rewrite_test.xml', 'Testing XPath XML Parser');
|
||||
$feed_node_edit_url = 'node/' . $nid . '/edit';
|
||||
$this->assertText('Created 3 nodes');
|
||||
$this->drupalGet('node');
|
||||
}
|
||||
}
|
@@ -0,0 +1,81 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Test cases for the xpath query parser.
|
||||
*/
|
||||
class FeedsXPathParserQueryParserTestCase extends DrupalUnitTestCase {
|
||||
/**
|
||||
* Describe this test.
|
||||
*/
|
||||
public static function getInfo() {
|
||||
return array(
|
||||
'name' => t('Query Parser'),
|
||||
'description' => t('Unit tests for the query parser inside Feeds XPath Parser.'),
|
||||
'group' => t('Feeds XPath Parser'),
|
||||
);
|
||||
}
|
||||
|
||||
public function setUp() {
|
||||
parent::setUp();
|
||||
module_load_include('inc', 'feeds_xpathparser', 'FeedsXPathParserQueryParser');
|
||||
}
|
||||
|
||||
function testSimple() {
|
||||
$parser = new FeedsXPathParserQueryParser('cow');
|
||||
$this->assertEqual($parser->getQuery(), '__default__:cow');
|
||||
$parser = new FeedsXPathParserQueryParser('/cow');
|
||||
$this->assertEqual($parser->getQuery(), '/__default__:cow');
|
||||
$parser = new FeedsXPathParserQueryParser('/cow/barn');
|
||||
$this->assertEqual($parser->getQuery(), '/__default__:cow/__default__:barn');
|
||||
$parser = new FeedsXPathParserQueryParser('/cow/barn[@id = "asdfsaf"]');
|
||||
$this->assertEqual($parser->getQuery(), '/__default__:cow/__default__:barn[@id = "asdfsaf"]');
|
||||
$parser = new FeedsXPathParserQueryParser('/cow/barn[@id=chair]');
|
||||
$this->assertEqual($parser->getQuery(), '/__default__:cow/__default__:barn[@id=__default__:chair]');
|
||||
$parser = new FeedsXPathParserQueryParser('/cow:asdf');
|
||||
$this->assertEqual($parser->getQuery(), '/cow:asdf');
|
||||
$parser = new FeedsXPathParserQueryParser('@cow');
|
||||
$this->assertEqual($parser->getQuery(), '@cow');
|
||||
$parser = new FeedsXPathParserQueryParser('starts-with(@id, "cat")');
|
||||
$this->assertEqual($parser->getQuery(), 'starts-with(@id, "cat")');
|
||||
$parser = new FeedsXPathParserQueryParser('starts-with(cat/dog/fire:breather, "cat")');
|
||||
$this->assertEqual($parser->getQuery(), 'starts-with(__default__:cat/__default__:dog/fire:breather, "cat")');
|
||||
$parser = new FeedsXPathParserQueryParser('//state[@id = ../city[name="CityName"]/state_id]/name');
|
||||
$this->assertEqual($parser->getQuery(), '//__default__:state[@id = ../__default__:city[__default__:name="CityName"]/__default__:state_id]/__default__:name');
|
||||
$parser = new FeedsXPathParserQueryParser('attribute::lang');
|
||||
$this->assertEqual($parser->getQuery(), 'attribute::lang');
|
||||
$parser = new FeedsXPathParserQueryParser('child::book');
|
||||
$this->assertEqual($parser->getQuery(), 'child::__default__:book');
|
||||
$parser = new FeedsXPathParserQueryParser('child::*');
|
||||
$this->assertEqual($parser->getQuery(), 'child::*');
|
||||
$parser = new FeedsXPathParserQueryParser('child::text()');
|
||||
$this->assertEqual($parser->getQuery(), 'child::text()');
|
||||
$parser = new FeedsXPathParserQueryParser('ancestor-or-self::book');
|
||||
$this->assertEqual($parser->getQuery(), 'ancestor-or-self::__default__:book');
|
||||
$parser = new FeedsXPathParserQueryParser('child::*/child::price');
|
||||
$this->assertEqual($parser->getQuery(), 'child::*/child::__default__:price');
|
||||
$parser = new FeedsXPathParserQueryParser("/asdfasfd[@id = 'a' or @id='b']");
|
||||
$this->assertEqual($parser->getQuery(), "/__default__:asdfasfd[@id = 'a' or @id='b']");
|
||||
// Go! difficult xpath queries from stack overflow.
|
||||
$parser = new FeedsXPathParserQueryParser("id('yui-gen2')/x:div[3]/x:div/x:a[1]");
|
||||
$this->assertEqual($parser->getQuery(), "id('yui-gen2')/x:div[3]/x:div/x:a[1]");
|
||||
$parser = new FeedsXPathParserQueryParser("/descendant::a[@class='buttonCheckout']");
|
||||
$this->assertEqual($parser->getQuery(), "/descendant::__default__:a[@class='buttonCheckout']");
|
||||
$parser = new FeedsXPathParserQueryParser("//a[@href='javascript:void(0)']");
|
||||
$this->assertEqual($parser->getQuery(), "//__default__:a[@href='javascript:void(0)']");
|
||||
$parser = new FeedsXPathParserQueryParser('//*/@attribute');
|
||||
$this->assertEqual($parser->getQuery(), '//*/@attribute');
|
||||
$parser = new FeedsXPathParserQueryParser('/descendant::*[attribute::attribute]');
|
||||
$this->assertEqual($parser->getQuery(), '/descendant::*[attribute::attribute]');
|
||||
$parser = new FeedsXPathParserQueryParser('//Event[not(System/Level = preceding::Level) or not(System/Task = preceding::Task)]');
|
||||
$this->assertEqual($parser->getQuery(), '//__default__:Event[not(__default__:System/__default__:Level = preceding::__default__:Level) or not(__default__:System/__default__:Task = preceding::__default__:Task)]');
|
||||
$parser = new FeedsXPathParserQueryParser("section[@type='cover']/line/@page");
|
||||
$this->assertEqual($parser->getQuery(), "__default__:section[@type='cover']/__default__:line/@page");
|
||||
$parser = new FeedsXPathParserQueryParser('/articles/article/*[name()="title" or name()="short"]');
|
||||
$this->assertEqual($parser->getQuery(), '/__default__:articles/__default__:article/*[name()="title" or name()="short"]');
|
||||
$parser = new FeedsXPathParserQueryParser("/*/article[@id='2']/*[self::title or self::short]");
|
||||
$this->assertEqual($parser->getQuery(), "/*/__default__:article[@id='2']/*[self::__default__:title or self::__default__:short]");
|
||||
$parser = new FeedsXPathParserQueryParser('not(/asdfasfd/asdfasf//asdfasdf) | /asdfasf/sadfasf/@asdf');
|
||||
$this->assertEqual($parser->getQuery(), 'not(/__default__:asdfasfd/__default__:asdfasf//__default__:asdfasdf) | /__default__:asdfasf/__default__:sadfasf/@asdf');
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user