Minify.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. <?php
  2. /**
  3. * Abstract minifier class
  4. *
  5. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  6. *
  7. * @author Matthias Mullie <minify@mullie.eu>
  8. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  9. * @license MIT License
  10. */
  11. namespace MatthiasMullie\Minify;
  12. use MatthiasMullie\Minify\Exceptions\IOException;
  13. use Psr\Cache\CacheItemInterface;
  14. /**
  15. * Abstract minifier class.
  16. *
  17. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  18. *
  19. * @package Minify
  20. * @author Matthias Mullie <minify@mullie.eu>
  21. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  22. * @license MIT License
  23. */
  24. abstract class Minify
  25. {
  26. /**
  27. * The data to be minified.
  28. *
  29. * @var string[]
  30. */
  31. protected $data = array();
  32. /**
  33. * Array of patterns to match.
  34. *
  35. * @var string[]
  36. */
  37. protected $patterns = array();
  38. /**
  39. * This array will hold content of strings and regular expressions that have
  40. * been extracted from the JS source code, so we can reliably match "code",
  41. * without having to worry about potential "code-like" characters inside.
  42. *
  43. * @var string[]
  44. */
  45. public $extracted = array();
  46. /**
  47. * Init the minify class - optionally, code may be passed along already.
  48. */
  49. public function __construct(/* $data = null, ... */)
  50. {
  51. // it's possible to add the source through the constructor as well ;)
  52. if (func_num_args()) {
  53. call_user_func_array(array($this, 'add'), func_get_args());
  54. }
  55. }
  56. /**
  57. * Add a file or straight-up code to be minified.
  58. *
  59. * @param string|string[] $data
  60. *
  61. * @return static
  62. */
  63. public function add($data /* $data = null, ... */)
  64. {
  65. // bogus "usage" of parameter $data: scrutinizer warns this variable is
  66. // not used (we're using func_get_args instead to support overloading),
  67. // but it still needs to be defined because it makes no sense to have
  68. // this function without argument :)
  69. $args = array($data) + func_get_args();
  70. // this method can be overloaded
  71. foreach ($args as $data) {
  72. if (is_array($data)) {
  73. call_user_func_array(array($this, 'add'), $data);
  74. continue;
  75. }
  76. // redefine var
  77. $data = (string) $data;
  78. // load data
  79. $value = $this->load($data);
  80. $key = ($data != $value) ? $data : count($this->data);
  81. // replace CR linefeeds etc.
  82. // @see https://github.com/matthiasmullie/minify/pull/139
  83. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  84. // store data
  85. $this->data[$key] = $value;
  86. }
  87. return $this;
  88. }
  89. /**
  90. * Minify the data & (optionally) saves it to a file.
  91. *
  92. * @param string[optional] $path Path to write the data to
  93. *
  94. * @return string The minified data
  95. */
  96. public function minify($path = null)
  97. {
  98. $content = $this->execute($path);
  99. // save to path
  100. if ($path !== null) {
  101. $this->save($content, $path);
  102. }
  103. return $content;
  104. }
  105. /**
  106. * Minify & gzip the data & (optionally) saves it to a file.
  107. *
  108. * @param string[optional] $path Path to write the data to
  109. * @param int[optional] $level Compression level, from 0 to 9
  110. *
  111. * @return string The minified & gzipped data
  112. */
  113. public function gzip($path = null, $level = 9)
  114. {
  115. $content = $this->execute($path);
  116. $content = gzencode($content, $level, FORCE_GZIP);
  117. // save to path
  118. if ($path !== null) {
  119. $this->save($content, $path);
  120. }
  121. return $content;
  122. }
  123. /**
  124. * Minify the data & write it to a CacheItemInterface object.
  125. *
  126. * @param CacheItemInterface $item Cache item to write the data to
  127. *
  128. * @return CacheItemInterface Cache item with the minifier data
  129. */
  130. public function cache(CacheItemInterface $item)
  131. {
  132. $content = $this->execute();
  133. $item->set($content);
  134. return $item;
  135. }
  136. /**
  137. * Minify the data.
  138. *
  139. * @param string[optional] $path Path to write the data to
  140. *
  141. * @return string The minified data
  142. */
  143. abstract public function execute($path = null);
  144. /**
  145. * Load data.
  146. *
  147. * @param string $data Either a path to a file or the content itself
  148. *
  149. * @return string
  150. */
  151. protected function load($data)
  152. {
  153. // check if the data is a file
  154. if ($this->canImportFile($data)) {
  155. $data = file_get_contents($data);
  156. // strip BOM, if any
  157. if (substr($data, 0, 3) == "\xef\xbb\xbf") {
  158. $data = substr($data, 3);
  159. }
  160. }
  161. return $data;
  162. }
  163. /**
  164. * Save to file.
  165. *
  166. * @param string $content The minified data
  167. * @param string $path The path to save the minified data to
  168. *
  169. * @throws IOException
  170. */
  171. protected function save($content, $path)
  172. {
  173. $handler = $this->openFileForWriting($path);
  174. $this->writeToFile($handler, $content);
  175. @fclose($handler);
  176. }
  177. /**
  178. * Register a pattern to execute against the source content.
  179. *
  180. * @param string $pattern PCRE pattern
  181. * @param string|callable $replacement Replacement value for matched pattern
  182. */
  183. protected function registerPattern($pattern, $replacement = '')
  184. {
  185. // study the pattern, we'll execute it more than once
  186. $pattern .= 'S';
  187. $this->patterns[] = array($pattern, $replacement);
  188. }
  189. /**
  190. * We can't "just" run some regular expressions against JavaScript: it's a
  191. * complex language. E.g. having an occurrence of // xyz would be a comment,
  192. * unless it's used within a string. Of you could have something that looks
  193. * like a 'string', but inside a comment.
  194. * The only way to accurately replace these pieces is to traverse the JS one
  195. * character at a time and try to find whatever starts first.
  196. *
  197. * @param string $content The content to replace patterns in
  198. *
  199. * @return string The (manipulated) content
  200. */
  201. protected function replace($content)
  202. {
  203. $processed = '';
  204. $positions = array_fill(0, count($this->patterns), -1);
  205. $matches = array();
  206. while ($content) {
  207. // find first match for all patterns
  208. foreach ($this->patterns as $i => $pattern) {
  209. list($pattern, $replacement) = $pattern;
  210. // we can safely ignore patterns for positions we've unset earlier,
  211. // because we know these won't show up anymore
  212. if (!isset($positions[$i])) {
  213. continue;
  214. }
  215. // no need to re-run matches that are still in the part of the
  216. // content that hasn't been processed
  217. if ($positions[$i] >= 0) {
  218. continue;
  219. }
  220. $match = null;
  221. if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
  222. $matches[$i] = $match;
  223. // we'll store the match position as well; that way, we
  224. // don't have to redo all preg_matches after changing only
  225. // the first (we'll still know where those others are)
  226. $positions[$i] = $match[0][1];
  227. } else {
  228. // if the pattern couldn't be matched, there's no point in
  229. // executing it again in later runs on this same content;
  230. // ignore this one until we reach end of content
  231. unset($matches[$i], $positions[$i]);
  232. }
  233. }
  234. // no more matches to find: everything's been processed, break out
  235. if (!$matches) {
  236. $processed .= $content;
  237. break;
  238. }
  239. // see which of the patterns actually found the first thing (we'll
  240. // only want to execute that one, since we're unsure if what the
  241. // other found was not inside what the first found)
  242. $discardLength = min($positions);
  243. $firstPattern = array_search($discardLength, $positions);
  244. $match = $matches[$firstPattern][0][0];
  245. // execute the pattern that matches earliest in the content string
  246. list($pattern, $replacement) = $this->patterns[$firstPattern];
  247. $replacement = $this->replacePattern($pattern, $replacement, $content);
  248. // figure out which part of the string was unmatched; that's the
  249. // part we'll execute the patterns on again next
  250. $content = (string) substr($content, $discardLength);
  251. $unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
  252. // move the replaced part to $processed and prepare $content to
  253. // again match batch of patterns against
  254. $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
  255. $content = $unmatched;
  256. // first match has been replaced & that content is to be left alone,
  257. // the next matches will start after this replacement, so we should
  258. // fix their offsets
  259. foreach ($positions as $i => $position) {
  260. $positions[$i] -= $discardLength + strlen($match);
  261. }
  262. }
  263. return $processed;
  264. }
  265. /**
  266. * This is where a pattern is matched against $content and the matches
  267. * are replaced by their respective value.
  268. * This function will be called plenty of times, where $content will always
  269. * move up 1 character.
  270. *
  271. * @param string $pattern Pattern to match
  272. * @param string|callable $replacement Replacement value
  273. * @param string $content Content to match pattern against
  274. *
  275. * @return string
  276. */
  277. protected function replacePattern($pattern, $replacement, $content)
  278. {
  279. if (is_callable($replacement)) {
  280. return preg_replace_callback($pattern, $replacement, $content, 1, $count);
  281. } else {
  282. return preg_replace($pattern, $replacement, $content, 1, $count);
  283. }
  284. }
  285. /**
  286. * Strings are a pattern we need to match, in order to ignore potential
  287. * code-like content inside them, but we just want all of the string
  288. * content to remain untouched.
  289. *
  290. * This method will replace all string content with simple STRING#
  291. * placeholder text, so we've rid all strings from characters that may be
  292. * misinterpreted. Original string content will be saved in $this->extracted
  293. * and after doing all other minifying, we can restore the original content
  294. * via restoreStrings().
  295. *
  296. * @param string[optional] $chars
  297. * @param string[optional] $placeholderPrefix
  298. */
  299. protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
  300. {
  301. // PHP only supports $this inside anonymous functions since 5.4
  302. $minifier = $this;
  303. $callback = function ($match) use ($minifier, $placeholderPrefix) {
  304. // check the second index here, because the first always contains a quote
  305. if ($match[2] === '') {
  306. /*
  307. * Empty strings need no placeholder; they can't be confused for
  308. * anything else anyway.
  309. * But we still needed to match them, for the extraction routine
  310. * to skip over this particular string.
  311. */
  312. return $match[0];
  313. }
  314. $count = count($minifier->extracted);
  315. $placeholder = $match[1].$placeholderPrefix.$count.$match[1];
  316. $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
  317. return $placeholder;
  318. };
  319. /*
  320. * The \\ messiness explained:
  321. * * Don't count ' or " as end-of-string if it's escaped (has backslash
  322. * in front of it)
  323. * * Unless... that backslash itself is escaped (another leading slash),
  324. * in which case it's no longer escaping the ' or "
  325. * * So there can be either no backslash, or an even number
  326. * * multiply all of that times 4, to account for the escaping that has
  327. * to be done to pass the backslash into the PHP string without it being
  328. * considered as escape-char (times 2) and to get it in the regex,
  329. * escaped (times 2)
  330. */
  331. $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
  332. }
  333. /**
  334. * This method will restore all extracted data (strings, regexes) that were
  335. * replaced with placeholder text in extract*(). The original content was
  336. * saved in $this->extracted.
  337. *
  338. * @param string $content
  339. *
  340. * @return string
  341. */
  342. protected function restoreExtractedData($content)
  343. {
  344. if (!$this->extracted) {
  345. // nothing was extracted, nothing to restore
  346. return $content;
  347. }
  348. $content = strtr($content, $this->extracted);
  349. $this->extracted = array();
  350. return $content;
  351. }
  352. /**
  353. * Check if the path is a regular file and can be read.
  354. *
  355. * @param string $path
  356. *
  357. * @return bool
  358. */
  359. protected function canImportFile($path)
  360. {
  361. $parsed = parse_url($path);
  362. if (
  363. // file is elsewhere
  364. isset($parsed['host']) ||
  365. // file responds to queries (may change, or need to bypass cache)
  366. isset($parsed['query'])
  367. ) {
  368. return false;
  369. }
  370. return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
  371. }
  372. /**
  373. * Attempts to open file specified by $path for writing.
  374. *
  375. * @param string $path The path to the file
  376. *
  377. * @return resource Specifier for the target file
  378. *
  379. * @throws IOException
  380. */
  381. protected function openFileForWriting($path)
  382. {
  383. if (($handler = @fopen($path, 'w')) === false) {
  384. throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.');
  385. }
  386. return $handler;
  387. }
  388. /**
  389. * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
  390. *
  391. * @param resource $handler The resource to write to
  392. * @param string $content The content to write
  393. * @param string $path The path to the file (for exception printing only)
  394. *
  395. * @throws IOException
  396. */
  397. protected function writeToFile($handler, $content, $path = '')
  398. {
  399. if (($result = @fwrite($handler, $content)) === false || ($result < strlen($content))) {
  400. throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.');
  401. }
  402. }
  403. }