Excerpts.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. <?php
  2. /**
  3. * @package Grav\Common\Helpers
  4. *
  5. * @copyright Copyright (C) 2015 - 2019 Trilby Media, LLC. All rights reserved.
  6. * @license MIT License; see LICENSE file for details.
  7. */
  8. namespace Grav\Common\Helpers;
  9. use Grav\Common\Grav;
  10. use Grav\Common\Page\Interfaces\PageInterface;
  11. use Grav\Common\Uri;
  12. use Grav\Common\Page\Medium\Medium;
  13. use Grav\Common\Utils;
  14. use RocketTheme\Toolbox\Event\Event;
  15. use RocketTheme\Toolbox\ResourceLocator\UniformResourceLocator;
  16. class Excerpts
  17. {
  18. /**
  19. * Process Grav image media URL from HTML tag
  20. *
  21. * @param string $html HTML tag e.g. `<img src="image.jpg" />`
  22. * @param PageInterface $page The current page object
  23. * @return string Returns final HTML string
  24. */
  25. public static function processImageHtml($html, PageInterface $page)
  26. {
  27. $excerpt = static::getExcerptFromHtml($html, 'img');
  28. $original_src = $excerpt['element']['attributes']['src'];
  29. $excerpt['element']['attributes']['href'] = $original_src;
  30. $excerpt = static::processLinkExcerpt($excerpt, $page, 'image');
  31. $excerpt['element']['attributes']['src'] = $excerpt['element']['attributes']['href'];
  32. unset ($excerpt['element']['attributes']['href']);
  33. $excerpt = static::processImageExcerpt($excerpt, $page);
  34. $excerpt['element']['attributes']['data-src'] = $original_src;
  35. $html = static::getHtmlFromExcerpt($excerpt);
  36. return $html;
  37. }
  38. /**
  39. * Get an Excerpt array from a chunk of HTML
  40. *
  41. * @param string $html Chunk of HTML
  42. * @param string $tag A tag, for example `img`
  43. * @return array|null returns nested array excerpt
  44. */
  45. public static function getExcerptFromHtml($html, $tag)
  46. {
  47. $doc = new \DOMDocument();
  48. $doc->loadHTML($html);
  49. $images = $doc->getElementsByTagName($tag);
  50. $excerpt = null;
  51. foreach ($images as $image) {
  52. $attributes = [];
  53. foreach ($image->attributes as $name => $value) {
  54. $attributes[$name] = $value->value;
  55. }
  56. $excerpt = [
  57. 'element' => [
  58. 'name' => $image->tagName,
  59. 'attributes' => $attributes
  60. ]
  61. ];
  62. }
  63. return $excerpt;
  64. }
  65. /**
  66. * Rebuild HTML tag from an excerpt array
  67. *
  68. * @param array $excerpt
  69. * @return string
  70. */
  71. public static function getHtmlFromExcerpt($excerpt)
  72. {
  73. $element = $excerpt['element'];
  74. $html = '<'.$element['name'];
  75. if (isset($element['attributes'])) {
  76. foreach ($element['attributes'] as $name => $value) {
  77. if ($value === null) {
  78. continue;
  79. }
  80. $html .= ' '.$name.'="'.$value.'"';
  81. }
  82. }
  83. if (isset($element['text'])) {
  84. $html .= '>';
  85. $html .= $element['text'];
  86. $html .= '</'.$element['name'].'>';
  87. } else {
  88. $html .= ' />';
  89. }
  90. return $html;
  91. }
  92. /**
  93. * Process a Link excerpt
  94. *
  95. * @param array $excerpt
  96. * @param PageInterface $page
  97. * @param string $type
  98. * @return mixed
  99. */
  100. public static function processLinkExcerpt($excerpt, PageInterface $page, $type = 'link')
  101. {
  102. $url = htmlspecialchars_decode(rawurldecode($excerpt['element']['attributes']['href']));
  103. $url_parts = static::parseUrl($url);
  104. // If there is a query, then parse it and build action calls.
  105. if (isset($url_parts['query'])) {
  106. $actions = array_reduce(explode('&', $url_parts['query']), function ($carry, $item) {
  107. $parts = explode('=', $item, 2);
  108. $value = isset($parts[1]) ? rawurldecode($parts[1]) : true;
  109. $carry[$parts[0]] = $value;
  110. return $carry;
  111. }, []);
  112. // Valid attributes supported.
  113. $valid_attributes = ['rel', 'target', 'id', 'class', 'classes'];
  114. // Unless told to not process, go through actions.
  115. if (array_key_exists('noprocess', $actions)) {
  116. unset($actions['noprocess']);
  117. } else {
  118. // Loop through actions for the image and call them.
  119. foreach ($actions as $attrib => $value) {
  120. $key = $attrib;
  121. if (in_array($attrib, $valid_attributes, true)) {
  122. // support both class and classes.
  123. if ($attrib === 'classes') {
  124. $attrib = 'class';
  125. }
  126. $excerpt['element']['attributes'][$attrib] = str_replace(',', ' ', $value);
  127. unset($actions[$key]);
  128. }
  129. }
  130. }
  131. $url_parts['query'] = http_build_query($actions, null, '&', PHP_QUERY_RFC3986);
  132. }
  133. // If no query elements left, unset query.
  134. if (empty($url_parts['query'])) {
  135. unset ($url_parts['query']);
  136. }
  137. // Set path to / if not set.
  138. if (empty($url_parts['path'])) {
  139. $url_parts['path'] = '';
  140. }
  141. // If scheme isn't http(s)..
  142. if (!empty($url_parts['scheme']) && !in_array($url_parts['scheme'], ['http', 'https'])) {
  143. // Handle custom streams.
  144. if ($type !== 'image' && !empty($url_parts['stream']) && !empty($url_parts['path'])) {
  145. $url_parts['path'] = Grav::instance()['base_url_relative'] . '/' . static::resolveStream("{$url_parts['scheme']}://{$url_parts['path']}");
  146. unset($url_parts['stream'], $url_parts['scheme']);
  147. }
  148. $excerpt['element']['attributes']['href'] = Uri::buildUrl($url_parts);
  149. return $excerpt;
  150. }
  151. // Handle paths and such.
  152. $url_parts = Uri::convertUrl($page, $url_parts, $type);
  153. // Build the URL from the component parts and set it on the element.
  154. $excerpt['element']['attributes']['href'] = Uri::buildUrl($url_parts);
  155. return $excerpt;
  156. }
  157. /**
  158. * Process an image excerpt
  159. *
  160. * @param array $excerpt
  161. * @param PageInterface $page
  162. * @return array
  163. */
  164. public static function processImageExcerpt(array $excerpt, PageInterface $page)
  165. {
  166. $url = htmlspecialchars_decode(urldecode($excerpt['element']['attributes']['src']));
  167. $url_parts = static::parseUrl($url);
  168. $media = null;
  169. $filename = null;
  170. if (!empty($url_parts['stream'])) {
  171. $filename = $url_parts['scheme'] . '://' . ($url_parts['path'] ?? '');
  172. $media = $page->getMedia();
  173. } else {
  174. $grav = Grav::instance();
  175. // File is also local if scheme is http(s) and host matches.
  176. $local_file = isset($url_parts['path'])
  177. && (empty($url_parts['scheme']) || in_array($url_parts['scheme'], ['http', 'https'], true))
  178. && (empty($url_parts['host']) || $url_parts['host'] === $grav['uri']->host());
  179. if ($local_file) {
  180. $filename = basename($url_parts['path']);
  181. $folder = dirname($url_parts['path']);
  182. // Get the local path to page media if possible.
  183. if ($folder === $page->url(false, false, false)) {
  184. // Get the media objects for this page.
  185. $media = $page->getMedia();
  186. } else {
  187. // see if this is an external page to this one
  188. $base_url = rtrim($grav['base_url_relative'] . $grav['pages']->base(), '/');
  189. $page_route = '/' . ltrim(str_replace($base_url, '', $folder), '/');
  190. /** @var PageInterface $ext_page */
  191. $ext_page = $grav['pages']->dispatch($page_route, true);
  192. if ($ext_page) {
  193. $media = $ext_page->getMedia();
  194. } else {
  195. $grav->fireEvent('onMediaLocate', new Event(['route' => $page_route, 'media' => &$media]));
  196. }
  197. }
  198. }
  199. }
  200. // If there is a media file that matches the path referenced..
  201. if ($media && $filename && isset($media[$filename])) {
  202. // Get the medium object.
  203. /** @var Medium $medium */
  204. $medium = $media[$filename];
  205. // Process operations
  206. $medium = static::processMediaActions($medium, $url_parts);
  207. $element_excerpt = $excerpt['element']['attributes'];
  208. $alt = $element_excerpt['alt'] ?? '';
  209. $title = $element_excerpt['title'] ?? '';
  210. $class = $element_excerpt['class'] ?? '';
  211. $id = $element_excerpt['id'] ?? '';
  212. $excerpt['element'] = $medium->parsedownElement($title, $alt, $class, $id, true);
  213. } else {
  214. // Not a current page media file, see if it needs converting to relative.
  215. $excerpt['element']['attributes']['src'] = Uri::buildUrl($url_parts);
  216. }
  217. return $excerpt;
  218. }
  219. /**
  220. * Process media actions
  221. *
  222. * @param Medium $medium
  223. * @param string|array $url
  224. * @return Medium
  225. */
  226. public static function processMediaActions($medium, $url)
  227. {
  228. if (!is_array($url)) {
  229. $url_parts = parse_url($url);
  230. } else {
  231. $url_parts = $url;
  232. }
  233. $actions = [];
  234. // if there is a query, then parse it and build action calls
  235. if (isset($url_parts['query'])) {
  236. $actions = array_reduce(explode('&', $url_parts['query']), function ($carry, $item) {
  237. $parts = explode('=', $item, 2);
  238. $value = $parts[1] ?? null;
  239. $carry[] = ['method' => $parts[0], 'params' => $value];
  240. return $carry;
  241. }, []);
  242. }
  243. if (Grav::instance()['config']->get('system.images.auto_fix_orientation')) {
  244. $actions[] = ['method' => 'fixOrientation', 'params' => ''];
  245. }
  246. $defaults = Grav::instance()['config']->get('system.images.defaults');
  247. if (is_array($defaults) && count($defaults)) {
  248. foreach ($defaults as $method => $params) {
  249. $actions[] = [
  250. 'method' => $method,
  251. 'params' => $params,
  252. ];
  253. }
  254. }
  255. // loop through actions for the image and call them
  256. foreach ($actions as $action) {
  257. $matches = [];
  258. if (preg_match('/\[(.*)\]/', $action['params'], $matches)) {
  259. $args = [explode(',', $matches[1])];
  260. } else {
  261. $args = explode(',', $action['params']);
  262. }
  263. $medium = call_user_func_array([$medium, $action['method']], $args);
  264. }
  265. if (isset($url_parts['fragment'])) {
  266. $medium->urlHash($url_parts['fragment']);
  267. }
  268. return $medium;
  269. }
  270. /**
  271. * Variation of parse_url() which works also with local streams.
  272. *
  273. * @param string $url
  274. * @return array|bool
  275. */
  276. protected static function parseUrl($url)
  277. {
  278. $url_parts = Utils::multibyteParseUrl($url);
  279. if (isset($url_parts['scheme'])) {
  280. /** @var UniformResourceLocator $locator */
  281. $locator = Grav::instance()['locator'];
  282. // Special handling for the streams.
  283. if ($locator->schemeExists($url_parts['scheme'])) {
  284. if (isset($url_parts['host'])) {
  285. // Merge host and path into a path.
  286. $url_parts['path'] = $url_parts['host'] . (isset($url_parts['path']) ? '/' . $url_parts['path'] : '');
  287. unset($url_parts['host']);
  288. }
  289. $url_parts['stream'] = true;
  290. }
  291. }
  292. return $url_parts;
  293. }
  294. /**
  295. * @param string $url
  296. * @return bool|string
  297. */
  298. protected static function resolveStream($url)
  299. {
  300. /** @var UniformResourceLocator $locator */
  301. $locator = Grav::instance()['locator'];
  302. if ($locator->isStream($url)) {
  303. return $locator->findResource($url, false) ?: $locator->findResource($url, false, true);
  304. }
  305. return $url;
  306. }
  307. }