first import

This commit is contained in:
Bachir Soussi Chiadmi
2015-04-08 11:40:19 +02:00
commit 1bc61b12ad
8435 changed files with 1582817 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
<?php
/**
* @files
* Provides the FeedsXPathParserHTML class.
*/
class FeedsXPathParserHTML extends FeedsXPathParserBase {
/**
* Implements FeedsXPathParserBase::setup().
*/
protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
if (!empty($source_config['exp']['tidy'])) {
$config = array(
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
);
// Default tidy encoding is UTF8.
$encoding = $source_config['exp']['tidy_encoding'];
$raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
}
else {
$raw = $fetcher_result->getRaw();
}
$doc = new DOMDocument();
// Use our own error handling.
$use = $this->errorStart();
$success = $doc->loadHTML($raw);
unset($raw);
$this->errorStop($use, $source_config['exp']['errors']);
if (!$success) {
throw new Exception(t('There was an error parsing the HTML document.'));
}
return $doc;
}
protected function getRaw(DOMNode $node) {
// DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
if (version_compare(phpversion(), '5.3.6', '>=')) {
return $this->doc->saveHTML($node);
}
return $this->doc->saveXML($node);
}
}