RuleLoader.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. <?php
  2. namespace PicoFeed\Scraper;
  3. use PicoFeed\Base;
  4. use PicoFeed\Logging\Logger;
  5. /**
  6. * RuleLoader class.
  7. *
  8. * @author Frederic Guillot
  9. * @author Bernhard Posselt
  10. */
  11. class RuleLoader extends Base
  12. {
  13. /**
  14. * Get the rules for an URL.
  15. *
  16. * @param string $url the URL that should be looked up
  17. * @return array the array containing the rules
  18. */
  19. public function getRules($url)
  20. {
  21. $hostname = parse_url($url, PHP_URL_HOST);
  22. if ($hostname !== false) {
  23. $files = $this->getRulesFileList($hostname);
  24. foreach ($this->getRulesFolders() as $folder) {
  25. $rule = $this->loadRuleFile($folder, $files);
  26. if (!empty($rule)) {
  27. return $rule;
  28. }
  29. }
  30. }
  31. return array();
  32. }
  33. /**
  34. * Get the list of possible rules file names for a given hostname.
  35. *
  36. * @param string $hostname Hostname
  37. * @return array
  38. */
  39. public function getRulesFileList($hostname)
  40. {
  41. $files = array($hostname); // subdomain.domain.tld
  42. $parts = explode('.', $hostname);
  43. $len = count($parts);
  44. if ($len > 2) {
  45. $subdomain = array_shift($parts);
  46. $files[] = implode('.', $parts); // domain.tld
  47. $files[] = '.'.implode('.', $parts); // .domain.tld
  48. $files[] = $subdomain; // subdomain
  49. } elseif ($len === 2) {
  50. $files[] = '.'.implode('.', $parts); // .domain.tld
  51. $files[] = $parts[0]; // domain
  52. }
  53. return $files;
  54. }
  55. /**
  56. * Load a rule file from the defined folder.
  57. *
  58. * @param string $folder Rule directory
  59. * @param array $files List of possible file names
  60. * @return array
  61. */
  62. public function loadRuleFile($folder, array $files)
  63. {
  64. foreach ($files as $file) {
  65. $filename = $folder.'/'.$file.'.php';
  66. if (file_exists($filename)) {
  67. Logger::setMessage(get_called_class().' Load rule: '.$file);
  68. return include $filename;
  69. }
  70. }
  71. return array();
  72. }
  73. /**
  74. * Get the list of folders that contains rules.
  75. *
  76. * @return array
  77. */
  78. public function getRulesFolders()
  79. {
  80. $folders = array();
  81. if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
  82. $folders[] = $this->config->getGrabberRulesFolder();
  83. }
  84. $folders[] = __DIR__ . '/../Rules';
  85. return $folders;
  86. }
  87. }