Client.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. <?php
  2. namespace PicoFeed\Client;
  3. use DateTime;
  4. use Exception;
  5. use LogicException;
  6. use PicoFeed\Logging\Logger;
  7. use PicoFeed\Config\Config;
  8. /**
  9. * Client class.
  10. *
  11. * @author Frederic Guillot
  12. */
  13. abstract class Client
  14. {
  15. /**
  16. * Flag that say if the resource have been modified.
  17. *
  18. * @var bool
  19. */
  20. private $is_modified = true;
  21. /**
  22. * HTTP Content-Type.
  23. *
  24. * @var string
  25. */
  26. private $content_type = '';
  27. /**
  28. * HTTP encoding.
  29. *
  30. * @var string
  31. */
  32. private $encoding = '';
  33. /**
  34. * HTTP request headers.
  35. *
  36. * @var array
  37. */
  38. protected $request_headers = array();
  39. /**
  40. * HTTP Etag header.
  41. *
  42. * @var string
  43. */
  44. protected $etag = '';
  45. /**
  46. * HTTP Last-Modified header.
  47. *
  48. * @var string
  49. */
  50. protected $last_modified = '';
  51. /**
  52. * Expiration DateTime
  53. *
  54. * @var DateTime
  55. */
  56. protected $expiration = null;
  57. /**
  58. * Proxy hostname.
  59. *
  60. * @var string
  61. */
  62. protected $proxy_hostname = '';
  63. /**
  64. * Proxy port.
  65. *
  66. * @var int
  67. */
  68. protected $proxy_port = 3128;
  69. /**
  70. * Proxy username.
  71. *
  72. * @var string
  73. */
  74. protected $proxy_username = '';
  75. /**
  76. * Proxy password.
  77. *
  78. * @var string
  79. */
  80. protected $proxy_password = '';
  81. /**
  82. * Basic auth username.
  83. *
  84. * @var string
  85. */
  86. protected $username = '';
  87. /**
  88. * Basic auth password.
  89. *
  90. * @var string
  91. */
  92. protected $password = '';
  93. /**
  94. * CURL options.
  95. *
  96. * @var array
  97. */
  98. protected $additional_curl_options = array();
  99. /**
  100. * Client connection timeout.
  101. *
  102. * @var int
  103. */
  104. protected $timeout = 10;
  105. /**
  106. * User-agent.
  107. *
  108. * @var string
  109. */
  110. protected $user_agent = 'PicoFeed (https://github.com/miniflux/picoFeed)';
  111. /**
  112. * Real URL used (can be changed after a HTTP redirect).
  113. *
  114. * @var string
  115. */
  116. protected $url = '';
  117. /**
  118. * Page/Feed content.
  119. *
  120. * @var string
  121. */
  122. protected $content = '';
  123. /**
  124. * Number maximum of HTTP redirections to avoid infinite loops.
  125. *
  126. * @var int
  127. */
  128. protected $max_redirects = 5;
  129. /**
  130. * Maximum size of the HTTP body response.
  131. *
  132. * @var int
  133. */
  134. protected $max_body_size = 2097152; // 2MB
  135. /**
  136. * HTTP response status code.
  137. *
  138. * @var int
  139. */
  140. protected $status_code = 0;
  141. /**
  142. * Enables direct passthrough to requesting client.
  143. *
  144. * @var bool
  145. */
  146. protected $passthrough = false;
  147. /**
  148. * Do the HTTP request.
  149. *
  150. * @abstract
  151. *
  152. * @return array
  153. */
  154. abstract public function doRequest();
  155. /**
  156. * Get client instance: curl or stream driver.
  157. *
  158. * @static
  159. *
  160. * @return \PicoFeed\Client\Client
  161. */
  162. public static function getInstance()
  163. {
  164. if (function_exists('curl_init')) {
  165. return new Curl();
  166. } elseif (ini_get('allow_url_fopen')) {
  167. return new Stream();
  168. }
  169. throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
  170. }
  171. /**
  172. * Add HTTP Header to the request.
  173. *
  174. * @param array $headers
  175. */
  176. public function setHeaders($headers)
  177. {
  178. $this->request_headers = $headers;
  179. }
  180. /**
  181. * Perform the HTTP request.
  182. *
  183. * @param string $url URL
  184. *
  185. * @return Client
  186. */
  187. public function execute($url = '')
  188. {
  189. if ($url !== '') {
  190. $this->url = $url;
  191. }
  192. Logger::setMessage(get_called_class().' Fetch URL: '.$this->url);
  193. Logger::setMessage(get_called_class().' Etag provided: '.$this->etag);
  194. Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
  195. $response = $this->doRequest();
  196. $this->status_code = $response['status'];
  197. $this->handleNotModifiedResponse($response);
  198. $this->handleErrorResponse($response);
  199. $this->handleNormalResponse($response);
  200. $this->expiration = $this->parseExpiration($response['headers']);
  201. Logger::setMessage(get_called_class().' Expiration: '.$this->expiration->format(DATE_ISO8601));
  202. return $this;
  203. }
  204. /**
  205. * Handle not modified response.
  206. *
  207. * @param array $response Client response
  208. */
  209. protected function handleNotModifiedResponse(array $response)
  210. {
  211. if ($response['status'] == 304) {
  212. $this->is_modified = false;
  213. } elseif ($response['status'] == 200) {
  214. $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
  215. $this->etag = $this->getHeader($response, 'ETag');
  216. $this->last_modified = $this->getHeader($response, 'Last-Modified');
  217. }
  218. if ($this->is_modified === false) {
  219. Logger::setMessage(get_called_class().' Resource not modified');
  220. }
  221. }
  222. /**
  223. * Handle Http Error codes
  224. *
  225. * @param array $response Client response
  226. * @throws ForbiddenException
  227. * @throws InvalidUrlException
  228. * @throws UnauthorizedException
  229. */
  230. protected function handleErrorResponse(array $response)
  231. {
  232. $status = $response['status'];
  233. if ($status == 401) {
  234. throw new UnauthorizedException('Wrong or missing credentials');
  235. } else if ($status == 403) {
  236. throw new ForbiddenException('Not allowed to access resource');
  237. } else if ($status == 404) {
  238. throw new InvalidUrlException('Resource not found');
  239. }
  240. }
  241. /**
  242. * Handle normal response.
  243. *
  244. * @param array $response Client response
  245. */
  246. protected function handleNormalResponse(array $response)
  247. {
  248. if ($response['status'] == 200) {
  249. $this->content = $response['body'];
  250. $this->content_type = $this->findContentType($response);
  251. $this->encoding = $this->findCharset();
  252. }
  253. }
  254. /**
  255. * Check if a request has been modified according to the parameters.
  256. *
  257. * @param array $response
  258. * @param string $etag
  259. * @param string $lastModified
  260. *
  261. * @return bool
  262. */
  263. private function hasBeenModified($response, $etag, $lastModified)
  264. {
  265. $headers = array(
  266. 'Etag' => $etag,
  267. 'Last-Modified' => $lastModified,
  268. );
  269. // Compare the values for each header that is present
  270. $presentCacheHeaderCount = 0;
  271. foreach ($headers as $key => $value) {
  272. if (isset($response['headers'][$key])) {
  273. if ($response['headers'][$key] !== $value) {
  274. return true;
  275. }
  276. ++$presentCacheHeaderCount;
  277. }
  278. }
  279. // If at least one header is present and the values match, the response
  280. // was not modified
  281. if ($presentCacheHeaderCount > 0) {
  282. return false;
  283. }
  284. return true;
  285. }
  286. /**
  287. * Find content type from response headers.
  288. *
  289. * @param array $response Client response
  290. * @return string
  291. */
  292. public function findContentType(array $response)
  293. {
  294. return strtolower($this->getHeader($response, 'Content-Type'));
  295. }
  296. /**
  297. * Find charset from response headers.
  298. *
  299. * @return string
  300. */
  301. public function findCharset()
  302. {
  303. $result = explode('charset=', $this->content_type);
  304. return isset($result[1]) ? $result[1] : '';
  305. }
  306. /**
  307. * Get header value from a client response.
  308. *
  309. * @param array $response Client response
  310. * @param string $header Header name
  311. * @return string
  312. */
  313. public function getHeader(array $response, $header)
  314. {
  315. return isset($response['headers'][$header]) ? $response['headers'][$header] : '';
  316. }
  317. /**
  318. * Set the Last-Modified HTTP header.
  319. *
  320. * @param string $last_modified Header value
  321. * @return $this
  322. */
  323. public function setLastModified($last_modified)
  324. {
  325. $this->last_modified = $last_modified;
  326. return $this;
  327. }
  328. /**
  329. * Get the value of the Last-Modified HTTP header.
  330. *
  331. * @return string
  332. */
  333. public function getLastModified()
  334. {
  335. return $this->last_modified;
  336. }
  337. /**
  338. * Set the value of the Etag HTTP header.
  339. *
  340. * @param string $etag Etag HTTP header value
  341. * @return $this
  342. */
  343. public function setEtag($etag)
  344. {
  345. $this->etag = $etag;
  346. return $this;
  347. }
  348. /**
  349. * Get the Etag HTTP header value.
  350. *
  351. * @return string
  352. */
  353. public function getEtag()
  354. {
  355. return $this->etag;
  356. }
  357. /**
  358. * Get the final url value.
  359. *
  360. * @return string
  361. */
  362. public function getUrl()
  363. {
  364. return $this->url;
  365. }
  366. /**
  367. * Set the url.
  368. *
  369. * @param $url
  370. * @return string
  371. */
  372. public function setUrl($url)
  373. {
  374. $this->url = $url;
  375. return $this;
  376. }
  377. /**
  378. * Get the HTTP response status code.
  379. *
  380. * @return int
  381. */
  382. public function getStatusCode()
  383. {
  384. return $this->status_code;
  385. }
  386. /**
  387. * Get the body of the HTTP response.
  388. *
  389. * @return string
  390. */
  391. public function getContent()
  392. {
  393. return $this->content;
  394. }
  395. /**
  396. * Get the content type value from HTTP headers.
  397. *
  398. * @return string
  399. */
  400. public function getContentType()
  401. {
  402. return $this->content_type;
  403. }
  404. /**
  405. * Get the encoding value from HTTP headers.
  406. *
  407. * @return string
  408. */
  409. public function getEncoding()
  410. {
  411. return $this->encoding;
  412. }
  413. /**
  414. * Return true if the remote resource has changed.
  415. *
  416. * @return bool
  417. */
  418. public function isModified()
  419. {
  420. return $this->is_modified;
  421. }
  422. /**
  423. * return true if passthrough mode is enabled.
  424. *
  425. * @return bool
  426. */
  427. public function isPassthroughEnabled()
  428. {
  429. return $this->passthrough;
  430. }
  431. /**
  432. * Set connection timeout.
  433. *
  434. * @param int $timeout Connection timeout
  435. * @return $this
  436. */
  437. public function setTimeout($timeout)
  438. {
  439. $this->timeout = $timeout ?: $this->timeout;
  440. return $this;
  441. }
  442. /**
  443. * Set a custom user agent.
  444. *
  445. * @param string $user_agent User Agent
  446. * @return $this
  447. */
  448. public function setUserAgent($user_agent)
  449. {
  450. $this->user_agent = $user_agent ?: $this->user_agent;
  451. return $this;
  452. }
  453. /**
  454. * Set the maximum number of HTTP redirections.
  455. *
  456. * @param int $max Maximum
  457. * @return $this
  458. */
  459. public function setMaxRedirections($max)
  460. {
  461. $this->max_redirects = $max ?: $this->max_redirects;
  462. return $this;
  463. }
  464. /**
  465. * Set the maximum size of the HTTP body.
  466. *
  467. * @param int $max Maximum
  468. * @return $this
  469. */
  470. public function setMaxBodySize($max)
  471. {
  472. $this->max_body_size = $max ?: $this->max_body_size;
  473. return $this;
  474. }
  475. /**
  476. * Set the proxy hostname.
  477. *
  478. * @param string $hostname Proxy hostname
  479. * @return $this
  480. */
  481. public function setProxyHostname($hostname)
  482. {
  483. $this->proxy_hostname = $hostname ?: $this->proxy_hostname;
  484. return $this;
  485. }
  486. /**
  487. * Set the proxy port.
  488. *
  489. * @param int $port Proxy port
  490. * @return $this
  491. */
  492. public function setProxyPort($port)
  493. {
  494. $this->proxy_port = $port ?: $this->proxy_port;
  495. return $this;
  496. }
  497. /**
  498. * Set the proxy username.
  499. *
  500. * @param string $username Proxy username
  501. * @return $this
  502. */
  503. public function setProxyUsername($username)
  504. {
  505. $this->proxy_username = $username ?: $this->proxy_username;
  506. return $this;
  507. }
  508. /**
  509. * Set the proxy password.
  510. *
  511. * @param string $password Password
  512. * @return $this
  513. */
  514. public function setProxyPassword($password)
  515. {
  516. $this->proxy_password = $password ?: $this->proxy_password;
  517. return $this;
  518. }
  519. /**
  520. * Set the username.
  521. *
  522. * @param string $username Basic Auth username
  523. *
  524. * @return $this
  525. */
  526. public function setUsername($username)
  527. {
  528. $this->username = $username ?: $this->username;
  529. return $this;
  530. }
  531. /**
  532. * Set the password.
  533. *
  534. * @param string $password Basic Auth Password
  535. *
  536. * @return $this
  537. */
  538. public function setPassword($password)
  539. {
  540. $this->password = $password ?: $this->password;
  541. return $this;
  542. }
  543. /**
  544. * Set the CURL options.
  545. *
  546. * @param array $options
  547. * @return $this
  548. */
  549. public function setAdditionalCurlOptions(array $options)
  550. {
  551. $this->additional_curl_options = $options ?: $this->additional_curl_options;
  552. return $this;
  553. }
  554. /**
  555. * Enable the passthrough mode.
  556. *
  557. * @return $this
  558. */
  559. public function enablePassthroughMode()
  560. {
  561. $this->passthrough = true;
  562. return $this;
  563. }
  564. /**
  565. * Disable the passthrough mode.
  566. *
  567. * @return $this
  568. */
  569. public function disablePassthroughMode()
  570. {
  571. $this->passthrough = false;
  572. return $this;
  573. }
  574. /**
  575. * Set config object.
  576. *
  577. * @param \PicoFeed\Config\Config $config Config instance
  578. * @return $this
  579. */
  580. public function setConfig(Config $config)
  581. {
  582. if ($config !== null) {
  583. $this->setTimeout($config->getClientTimeout());
  584. $this->setUserAgent($config->getClientUserAgent());
  585. $this->setMaxRedirections($config->getMaxRedirections());
  586. $this->setMaxBodySize($config->getMaxBodySize());
  587. $this->setProxyHostname($config->getProxyHostname());
  588. $this->setProxyPort($config->getProxyPort());
  589. $this->setProxyUsername($config->getProxyUsername());
  590. $this->setProxyPassword($config->getProxyPassword());
  591. $this->setAdditionalCurlOptions($config->getAdditionalCurlOptions() ?: array());
  592. }
  593. return $this;
  594. }
  595. /**
  596. * Return true if the HTTP status code is a redirection
  597. *
  598. * @access protected
  599. * @param integer $code
  600. * @return boolean
  601. */
  602. public function isRedirection($code)
  603. {
  604. return $code == 301 || $code == 302 || $code == 303 || $code == 307;
  605. }
  606. public function parseExpiration(HttpHeaders $headers)
  607. {
  608. try {
  609. if (isset($headers['Cache-Control'])) {
  610. if (preg_match('/s-maxage=(\d+)/', $headers['Cache-Control'], $matches)) {
  611. return new DateTime('+' . $matches[1] . ' seconds');
  612. } else if (preg_match('/max-age=(\d+)/', $headers['Cache-Control'], $matches)) {
  613. return new DateTime('+' . $matches[1] . ' seconds');
  614. }
  615. }
  616. if (! empty($headers['Expires'])) {
  617. return new DateTime($headers['Expires']);
  618. }
  619. } catch (Exception $e) {
  620. Logger::setMessage('Unable to parse expiration date: '.$e->getMessage());
  621. }
  622. return new DateTime();
  623. }
  624. /**
  625. * Get expiration date time from "Expires" or "Cache-Control" headers
  626. *
  627. * @return DateTime
  628. */
  629. public function getExpiration()
  630. {
  631. return $this->expiration ?: new DateTime();
  632. }
  633. }