Minify.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. <?php
  2. /**
  3. * Abstract minifier class
  4. *
  5. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  6. *
  7. * @author Matthias Mullie <minify@mullie.eu>
  8. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  9. * @license MIT License
  10. */
  11. namespace MatthiasMullie\Minify;
  12. use MatthiasMullie\Minify\Exceptions\IOException;
  13. use Psr\Cache\CacheItemInterface;
  14. /**
  15. * Abstract minifier class.
  16. *
  17. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  18. *
  19. * @package Minify
  20. * @author Matthias Mullie <minify@mullie.eu>
  21. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  22. * @license MIT License
  23. */
  24. abstract class Minify
  25. {
  26. /**
  27. * The data to be minified.
  28. *
  29. * @var string[]
  30. */
  31. protected $data = array();
  32. /**
  33. * Array of patterns to match.
  34. *
  35. * @var string[]
  36. */
  37. protected $patterns = array();
  38. /**
  39. * This array will hold content of strings and regular expressions that have
  40. * been extracted from the JS source code, so we can reliably match "code",
  41. * without having to worry about potential "code-like" characters inside.
  42. *
  43. * @var string[]
  44. */
  45. public $extracted = array();
  46. /**
  47. * Init the minify class - optionally, code may be passed along already.
  48. */
  49. public function __construct(/* $data = null, ... */)
  50. {
  51. // it's possible to add the source through the constructor as well ;)
  52. if (func_num_args()) {
  53. call_user_func_array(array($this, 'add'), func_get_args());
  54. }
  55. }
  56. /**
  57. * Add a file or straight-up code to be minified.
  58. *
  59. * @param string|string[] $data
  60. *
  61. * @return static
  62. */
  63. public function add($data /* $data = null, ... */)
  64. {
  65. // bogus "usage" of parameter $data: scrutinizer warns this variable is
  66. // not used (we're using func_get_args instead to support overloading),
  67. // but it still needs to be defined because it makes no sense to have
  68. // this function without argument :)
  69. $args = array($data) + func_get_args();
  70. // this method can be overloaded
  71. foreach ($args as $data) {
  72. if (is_array($data)) {
  73. call_user_func_array(array($this, 'add'), $data);
  74. continue;
  75. }
  76. // redefine var
  77. $data = (string) $data;
  78. // load data
  79. $value = $this->load($data);
  80. $key = ($data != $value) ? $data : count($this->data);
  81. // replace CR linefeeds etc.
  82. // @see https://github.com/matthiasmullie/minify/pull/139
  83. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  84. // store data
  85. $this->data[$key] = $value;
  86. }
  87. return $this;
  88. }
  89. /**
  90. * Minify the data & (optionally) saves it to a file.
  91. *
  92. * @param string[optional] $path Path to write the data to
  93. *
  94. * @return string The minified data
  95. */
  96. public function minify($path = null)
  97. {
  98. $content = $this->execute($path);
  99. // save to path
  100. if ($path !== null) {
  101. $this->save($content, $path);
  102. }
  103. return $content;
  104. }
  105. /**
  106. * Minify & gzip the data & (optionally) saves it to a file.
  107. *
  108. * @param string[optional] $path Path to write the data to
  109. * @param int[optional] $level Compression level, from 0 to 9
  110. *
  111. * @return string The minified & gzipped data
  112. */
  113. public function gzip($path = null, $level = 9)
  114. {
  115. $content = $this->execute($path);
  116. $content = gzencode($content, $level, FORCE_GZIP);
  117. // save to path
  118. if ($path !== null) {
  119. $this->save($content, $path);
  120. }
  121. return $content;
  122. }
  123. /**
  124. * Minify the data & write it to a CacheItemInterface object.
  125. *
  126. * @param CacheItemInterface $item Cache item to write the data to
  127. *
  128. * @return CacheItemInterface Cache item with the minifier data
  129. */
  130. public function cache(CacheItemInterface $item)
  131. {
  132. $content = $this->execute();
  133. $item->set($content);
  134. return $item;
  135. }
  136. /**
  137. * Minify the data.
  138. *
  139. * @param string[optional] $path Path to write the data to
  140. *
  141. * @return string The minified data
  142. */
  143. abstract public function execute($path = null);
  144. /**
  145. * Load data.
  146. *
  147. * @param string $data Either a path to a file or the content itself
  148. *
  149. * @return string
  150. */
  151. protected function load($data)
  152. {
  153. // check if the data is a file
  154. if ($this->canImportFile($data)) {
  155. $data = file_get_contents($data);
  156. // strip BOM, if any
  157. if (substr($data, 0, 3) == "\xef\xbb\xbf") {
  158. $data = substr($data, 3);
  159. }
  160. }
  161. return $data;
  162. }
  163. /**
  164. * Save to file.
  165. *
  166. * @param string $content The minified data
  167. * @param string $path The path to save the minified data to
  168. *
  169. * @throws IOException
  170. */
  171. protected function save($content, $path)
  172. {
  173. $handler = $this->openFileForWriting($path);
  174. $this->writeToFile($handler, $content);
  175. @fclose($handler);
  176. }
  177. /**
  178. * Register a pattern to execute against the source content.
  179. *
  180. * @param string $pattern PCRE pattern
  181. * @param string|callable $replacement Replacement value for matched pattern
  182. */
  183. protected function registerPattern($pattern, $replacement = '')
  184. {
  185. // study the pattern, we'll execute it more than once
  186. $pattern .= 'S';
  187. $this->patterns[] = array($pattern, $replacement);
  188. }
  189. /**
  190. * We can't "just" run some regular expressions against JavaScript: it's a
  191. * complex language. E.g. having an occurrence of // xyz would be a comment,
  192. * unless it's used within a string. Of you could have something that looks
  193. * like a 'string', but inside a comment.
  194. * The only way to accurately replace these pieces is to traverse the JS one
  195. * character at a time and try to find whatever starts first.
  196. *
  197. * @param string $content The content to replace patterns in
  198. *
  199. * @return string The (manipulated) content
  200. */
  201. protected function replace($content)
  202. {
  203. $processed = '';
  204. $positions = array_fill(0, count($this->patterns), -1);
  205. $matches = array();
  206. while ($content) {
  207. // find first match for all patterns
  208. foreach ($this->patterns as $i => $pattern) {
  209. list($pattern, $replacement) = $pattern;
  210. // no need to re-run matches that are still in the part of the
  211. // content that hasn't been processed
  212. if ($positions[$i] >= 0) {
  213. continue;
  214. }
  215. $match = null;
  216. if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
  217. $matches[$i] = $match;
  218. // we'll store the match position as well; that way, we
  219. // don't have to redo all preg_matches after changing only
  220. // the first (we'll still know where those others are)
  221. $positions[$i] = $match[0][1];
  222. } else {
  223. // if the pattern couldn't be matched, there's no point in
  224. // executing it again in later runs on this same content;
  225. // ignore this one until we reach end of content
  226. unset($matches[$i]);
  227. $positions[$i] = strlen($content);
  228. }
  229. }
  230. // no more matches to find: everything's been processed, break out
  231. if (!$matches) {
  232. $processed .= $content;
  233. break;
  234. }
  235. // see which of the patterns actually found the first thing (we'll
  236. // only want to execute that one, since we're unsure if what the
  237. // other found was not inside what the first found)
  238. $discardLength = min($positions);
  239. $firstPattern = array_search($discardLength, $positions);
  240. $match = $matches[$firstPattern][0][0];
  241. // execute the pattern that matches earliest in the content string
  242. list($pattern, $replacement) = $this->patterns[$firstPattern];
  243. $replacement = $this->replacePattern($pattern, $replacement, $content);
  244. // figure out which part of the string was unmatched; that's the
  245. // part we'll execute the patterns on again next
  246. $content = (string) substr($content, $discardLength);
  247. $unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
  248. // move the replaced part to $processed and prepare $content to
  249. // again match batch of patterns against
  250. $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
  251. $content = $unmatched;
  252. // first match has been replaced & that content is to be left alone,
  253. // the next matches will start after this replacement, so we should
  254. // fix their offsets
  255. foreach ($positions as $i => $position) {
  256. $positions[$i] -= $discardLength + strlen($match);
  257. }
  258. }
  259. return $processed;
  260. }
  261. /**
  262. * This is where a pattern is matched against $content and the matches
  263. * are replaced by their respective value.
  264. * This function will be called plenty of times, where $content will always
  265. * move up 1 character.
  266. *
  267. * @param string $pattern Pattern to match
  268. * @param string|callable $replacement Replacement value
  269. * @param string $content Content to match pattern against
  270. *
  271. * @return string
  272. */
  273. protected function replacePattern($pattern, $replacement, $content)
  274. {
  275. if (is_callable($replacement)) {
  276. return preg_replace_callback($pattern, $replacement, $content, 1, $count);
  277. } else {
  278. return preg_replace($pattern, $replacement, $content, 1, $count);
  279. }
  280. }
  281. /**
  282. * Strings are a pattern we need to match, in order to ignore potential
  283. * code-like content inside them, but we just want all of the string
  284. * content to remain untouched.
  285. *
  286. * This method will replace all string content with simple STRING#
  287. * placeholder text, so we've rid all strings from characters that may be
  288. * misinterpreted. Original string content will be saved in $this->extracted
  289. * and after doing all other minifying, we can restore the original content
  290. * via restoreStrings().
  291. *
  292. * @param string[optional] $chars
  293. * @param string[optional] $placeholderPrefix
  294. */
  295. protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
  296. {
  297. // PHP only supports $this inside anonymous functions since 5.4
  298. $minifier = $this;
  299. $callback = function ($match) use ($minifier, $placeholderPrefix) {
  300. // check the second index here, because the first always contains a quote
  301. if ($match[2] === '') {
  302. /*
  303. * Empty strings need no placeholder; they can't be confused for
  304. * anything else anyway.
  305. * But we still needed to match them, for the extraction routine
  306. * to skip over this particular string.
  307. */
  308. return $match[0];
  309. }
  310. $count = count($minifier->extracted);
  311. $placeholder = $match[1].$placeholderPrefix.$count.$match[1];
  312. $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
  313. return $placeholder;
  314. };
  315. /*
  316. * The \\ messiness explained:
  317. * * Don't count ' or " as end-of-string if it's escaped (has backslash
  318. * in front of it)
  319. * * Unless... that backslash itself is escaped (another leading slash),
  320. * in which case it's no longer escaping the ' or "
  321. * * So there can be either no backslash, or an even number
  322. * * multiply all of that times 4, to account for the escaping that has
  323. * to be done to pass the backslash into the PHP string without it being
  324. * considered as escape-char (times 2) and to get it in the regex,
  325. * escaped (times 2)
  326. */
  327. $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
  328. }
  329. /**
  330. * This method will restore all extracted data (strings, regexes) that were
  331. * replaced with placeholder text in extract*(). The original content was
  332. * saved in $this->extracted.
  333. *
  334. * @param string $content
  335. *
  336. * @return string
  337. */
  338. protected function restoreExtractedData($content)
  339. {
  340. if (!$this->extracted) {
  341. // nothing was extracted, nothing to restore
  342. return $content;
  343. }
  344. $content = strtr($content, $this->extracted);
  345. $this->extracted = array();
  346. return $content;
  347. }
  348. /**
  349. * Check if the path is a regular file and can be read.
  350. *
  351. * @param string $path
  352. *
  353. * @return bool
  354. */
  355. protected function canImportFile($path)
  356. {
  357. $parsed = parse_url($path);
  358. if (
  359. // file is elsewhere
  360. isset($parsed['host']) ||
  361. // file responds to queries (may change, or need to bypass cache)
  362. isset($parsed['query'])
  363. ) {
  364. return false;
  365. }
  366. return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
  367. }
  368. /**
  369. * Attempts to open file specified by $path for writing.
  370. *
  371. * @param string $path The path to the file
  372. *
  373. * @return resource Specifier for the target file
  374. *
  375. * @throws IOException
  376. */
  377. protected function openFileForWriting($path)
  378. {
  379. if (($handler = @fopen($path, 'w')) === false) {
  380. throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.');
  381. }
  382. return $handler;
  383. }
  384. /**
  385. * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
  386. *
  387. * @param resource $handler The resource to write to
  388. * @param string $content The content to write
  389. * @param string $path The path to the file (for exception printing only)
  390. *
  391. * @throws IOException
  392. */
  393. protected function writeToFile($handler, $content, $path = '')
  394. {
  395. if (($result = @fwrite($handler, $content)) === false || ($result < strlen($content))) {
  396. throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.');
  397. }
  398. }
  399. }