stylesheet.cls.php 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419
  1. <?php
  2. /**
  3. * @package dompdf
  4. * @link http://dompdf.github.com/
  5. * @author Benj Carson <benjcarson@digitaljunkies.ca>
  6. * @author Helmut Tischer <htischer@weihenstephan.org>
  7. * @author Fabien Ménager <fabien.menager@gmail.com>
  8. * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
  9. */
  10. /**
  11. * The location of the default built-in CSS file.
  12. * {@link Stylesheet::DEFAULT_STYLESHEET}
  13. */
  14. define('__DEFAULT_STYLESHEET', DOMPDF_LIB_DIR . DIRECTORY_SEPARATOR . "res" . DIRECTORY_SEPARATOR . "html.css");
  15. /**
  16. * The master stylesheet class
  17. *
  18. * The Stylesheet class is responsible for parsing stylesheets and style
  19. * tags/attributes. It also acts as a registry of the individual Style
  20. * objects generated by the current set of loaded CSS files and style
  21. * elements.
  22. *
  23. * @see Style
  24. * @package dompdf
  25. */
  26. class Stylesheet {
  27. /**
  28. * The location of the default built-in CSS file.
  29. */
  30. const DEFAULT_STYLESHEET = __DEFAULT_STYLESHEET;
  31. /**
  32. * User agent stylesheet origin
  33. *
  34. * @var int
  35. */
  36. const ORIG_UA = 1;
  37. /**
  38. * User normal stylesheet origin
  39. *
  40. * @var int
  41. */
  42. const ORIG_USER = 2;
  43. /**
  44. * Author normal stylesheet origin
  45. *
  46. * @var int
  47. */
  48. const ORIG_AUTHOR = 3;
  49. private static $_stylesheet_origins = array(
  50. self::ORIG_UA => -0x0FFFFFFF, // user agent style sheets
  51. self::ORIG_USER => -0x0000FFFF, // user normal style sheets
  52. self::ORIG_AUTHOR => 0x00000000, // author normal style sheets
  53. );
  54. /**
  55. * Current dompdf instance
  56. *
  57. * @var DOMPDF
  58. */
  59. private $_dompdf;
  60. /**
  61. * Array of currently defined styles
  62. *
  63. * @var Style[]
  64. */
  65. private $_styles;
  66. /**
  67. * Base protocol of the document being parsed
  68. * Used to handle relative urls.
  69. *
  70. * @var string
  71. */
  72. private $_protocol;
  73. /**
  74. * Base hostname of the document being parsed
  75. * Used to handle relative urls.
  76. *
  77. * @var string
  78. */
  79. private $_base_host;
  80. /**
  81. * Base path of the document being parsed
  82. * Used to handle relative urls.
  83. *
  84. * @var string
  85. */
  86. private $_base_path;
  87. /**
  88. * The styles defined by @page rules
  89. *
  90. * @var array<Style>
  91. */
  92. private $_page_styles;
  93. /**
  94. * List of loaded files, used to prevent recursion
  95. *
  96. * @var array
  97. */
  98. private $_loaded_files;
  99. /**
  100. * Current stylesheet origin
  101. *
  102. * @var int
  103. */
  104. private $_current_origin = self::ORIG_UA;
  105. /**
  106. * Accepted CSS media types
  107. * List of types and parsing rules for future extensions:
  108. * http://www.w3.org/TR/REC-html40/types.html
  109. * screen, tty, tv, projection, handheld, print, braille, aural, all
  110. * The following are non standard extensions for undocumented specific environments.
  111. * static, visual, bitmap, paged, dompdf
  112. * Note, even though the generated pdf file is intended for print output,
  113. * the desired content might be different (e.g. screen or projection view of html file).
  114. * Therefore allow specification of content by dompdf setting DOMPDF_DEFAULT_MEDIA_TYPE.
  115. * If given, replace media "print" by DOMPDF_DEFAULT_MEDIA_TYPE.
  116. * (Previous version $ACCEPTED_MEDIA_TYPES = $ACCEPTED_GENERIC_MEDIA_TYPES + $ACCEPTED_DEFAULT_MEDIA_TYPE)
  117. */
  118. static $ACCEPTED_DEFAULT_MEDIA_TYPE = "print";
  119. static $ACCEPTED_GENERIC_MEDIA_TYPES = array("all", "static", "visual", "bitmap", "paged", "dompdf");
  120. /**
  121. * The class constructor.
  122. *
  123. * The base protocol, host & path are initialized to those of
  124. * the current script.
  125. */
  126. function __construct(DOMPDF $dompdf) {
  127. $this->_dompdf = $dompdf;
  128. $this->_styles = array();
  129. $this->_loaded_files = array();
  130. list($this->_protocol, $this->_base_host, $this->_base_path) = explode_url($_SERVER["SCRIPT_FILENAME"]);
  131. $this->_page_styles = array("base" => null);
  132. }
  133. /**
  134. * Class destructor
  135. */
  136. function __destruct() {
  137. clear_object($this);
  138. }
  139. /**
  140. * Set the base protocol
  141. *
  142. * @param string $protocol
  143. */
  144. function set_protocol($protocol) { $this->_protocol = $protocol; }
  145. /**
  146. * Set the base host
  147. *
  148. * @param string $host
  149. */
  150. function set_host($host) { $this->_base_host = $host; }
  151. /**
  152. * Set the base path
  153. *
  154. * @param string $path
  155. */
  156. function set_base_path($path) { $this->_base_path = $path; }
  157. /**
  158. * Return the DOMPDF object
  159. *
  160. * @return DOMPDF
  161. */
  162. function get_dompdf() { return $this->_dompdf; }
  163. /**
  164. * Return the base protocol for this stylesheet
  165. *
  166. * @return string
  167. */
  168. function get_protocol() { return $this->_protocol; }
  169. /**
  170. * Return the base host for this stylesheet
  171. *
  172. * @return string
  173. */
  174. function get_host() { return $this->_base_host; }
  175. /**
  176. * Return the base path for this stylesheet
  177. *
  178. * @return string
  179. */
  180. function get_base_path() { return $this->_base_path; }
  181. /**
  182. * Return the array of page styles
  183. *
  184. * @return Style[]
  185. */
  186. function get_page_styles() { return $this->_page_styles; }
  187. /**
  188. * Add a new Style object to the stylesheet
  189. * add_style() adds a new Style object to the current stylesheet, or
  190. * merges a new Style with an existing one.
  191. *
  192. * @param string $key the Style's selector
  193. * @param Style $style the Style to be added
  194. *
  195. * @throws DOMPDF_Exception
  196. */
  197. function add_style($key, Style $style) {
  198. if ( !is_string($key) ) {
  199. throw new DOMPDF_Exception("CSS rule must be keyed by a string.");
  200. }
  201. if ( isset($this->_styles[$key]) ) {
  202. $this->_styles[$key]->merge($style);
  203. }
  204. else {
  205. $this->_styles[$key] = clone $style;
  206. }
  207. $this->_styles[$key]->set_origin( $this->_current_origin );
  208. }
  209. /**
  210. * lookup a specifc Style object
  211. *
  212. * lookup() returns the Style specified by $key, or null if the Style is
  213. * not found.
  214. *
  215. * @param string $key the selector of the requested Style
  216. * @return Style
  217. */
  218. function lookup($key) {
  219. if ( !isset($this->_styles[$key]) ) {
  220. return null;
  221. }
  222. return $this->_styles[$key];
  223. }
  224. /**
  225. * create a new Style object associated with this stylesheet
  226. *
  227. * @param Style $parent The style of this style's parent in the DOM tree
  228. * @return Style
  229. */
  230. function create_style(Style $parent = null) {
  231. return new Style($this, $this->_current_origin);
  232. }
  233. /**
  234. * load and parse a CSS string
  235. *
  236. * @param string $css
  237. */
  238. function load_css(&$css) { $this->_parse_css($css); }
  239. /**
  240. * load and parse a CSS file
  241. *
  242. * @param string $file
  243. * @param int $origin
  244. */
  245. function load_css_file($file, $origin = self::ORIG_AUTHOR) {
  246. if ( $origin ) {
  247. $this->_current_origin = $origin;
  248. }
  249. // Prevent circular references
  250. if ( isset($this->_loaded_files[$file]) ) {
  251. return;
  252. }
  253. $this->_loaded_files[$file] = true;
  254. if ( strpos($file, "data:") === 0) {
  255. $parsed = parse_data_uri($file);
  256. $css = $parsed["data"];
  257. }
  258. else {
  259. $parsed_url = explode_url($file);
  260. list($this->_protocol, $this->_base_host, $this->_base_path, $filename) = $parsed_url;
  261. // Fix submitted by Nick Oostveen for aliased directory support:
  262. if ( $this->_protocol == "" ) {
  263. $file = $this->_base_path . $filename;
  264. }
  265. else {
  266. $file = build_url($this->_protocol, $this->_base_host, $this->_base_path, $filename);
  267. }
  268. set_error_handler("record_warnings");
  269. $css = file_get_contents($file, null, $this->_dompdf->get_http_context());
  270. restore_error_handler();
  271. $good_mime_type = true;
  272. // See http://the-stickman.com/web-development/php/getting-http-response-headers-when-using-file_get_contents/
  273. if ( isset($http_response_header) && !$this->_dompdf->get_quirksmode() ) {
  274. foreach($http_response_header as $_header) {
  275. if ( preg_match("@Content-Type:\s*([\w/]+)@i", $_header, $matches) &&
  276. ($matches[1] !== "text/css") ) {
  277. $good_mime_type = false;
  278. }
  279. }
  280. }
  281. if ( !$good_mime_type || $css == "" ) {
  282. record_warnings(E_USER_WARNING, "Unable to load css file $file", __FILE__, __LINE__);
  283. return;
  284. }
  285. }
  286. $this->_parse_css($css);
  287. }
  288. /**
  289. * @link http://www.w3.org/TR/CSS21/cascade.html#specificity
  290. *
  291. * @param string $selector
  292. * @param int $origin :
  293. * - ua: user agent style sheets
  294. * - un: user normal style sheets
  295. * - an: author normal style sheets
  296. * - ai: author important style sheets
  297. * - ui: user important style sheets
  298. *
  299. * @return int
  300. */
  301. private function _specificity($selector, $origin = self::ORIG_AUTHOR) {
  302. // http://www.w3.org/TR/CSS21/cascade.html#specificity
  303. // ignoring the ":" pseudoclass modifyers
  304. // also ignored in _css_selector_to_xpath
  305. $a = ($selector === "!attr") ? 1 : 0;
  306. $b = min(mb_substr_count($selector, "#"), 255);
  307. $c = min(mb_substr_count($selector, ".") +
  308. mb_substr_count($selector, "["), 255);
  309. $d = min(mb_substr_count($selector, " ") +
  310. mb_substr_count($selector, ">") +
  311. mb_substr_count($selector, "+"), 255);
  312. //If a normal element name is at the begining of the string,
  313. //a leading whitespace might have been removed on whitespace collapsing and removal
  314. //therefore there might be one whitespace less as selected element names
  315. //this can lead to a too small specificity
  316. //see _css_selector_to_xpath
  317. if ( !in_array($selector[0], array(" ", ">", ".", "#", "+", ":", "["))/* && $selector !== "*"*/) {
  318. $d++;
  319. }
  320. if (DEBUGCSS) {
  321. /*DEBUGCSS*/ print "<pre>\n";
  322. /*DEBUGCSS*/ printf("_specificity(): 0x%08x \"%s\"\n", ($a << 24) | ($b << 16) | ($c << 8) | ($d), $selector);
  323. /*DEBUGCSS*/ print "</pre>";
  324. }
  325. return self::$_stylesheet_origins[$origin] + ($a << 24) | ($b << 16) | ($c << 8) | ($d);
  326. }
  327. /**
  328. * Converts a CSS selector to an XPath query.
  329. *
  330. * @param string $selector
  331. * @param bool $first_pass
  332. *
  333. * @throws DOMPDF_Exception
  334. * @return string
  335. */
  336. private function _css_selector_to_xpath($selector, $first_pass = false) {
  337. // Collapse white space and strip whitespace around delimiters
  338. // $search = array("/\\s+/", "/\\s+([.>#+:])\\s+/");
  339. // $replace = array(" ", "\\1");
  340. // $selector = preg_replace($search, $replace, trim($selector));
  341. // Initial query (non-absolute)
  342. $query = "//";
  343. // Will contain :before and :after if they must be created
  344. $pseudo_elements = array();
  345. // Parse the selector
  346. //$s = preg_split("/([ :>.#+])/", $selector, -1, PREG_SPLIT_DELIM_CAPTURE);
  347. $delimiters = array(" ", ">", ".", "#", "+", ":", "[", "(");
  348. // Add an implicit * at the beginning of the selector
  349. // if it begins with an attribute selector
  350. if ( $selector[0] === "[" ) {
  351. $selector = "*$selector";
  352. }
  353. // Add an implicit space at the beginning of the selector if there is no
  354. // delimiter there already.
  355. if ( !in_array($selector[0], $delimiters) ) {
  356. $selector = " $selector";
  357. }
  358. $tok = "";
  359. $len = mb_strlen($selector);
  360. $i = 0;
  361. while ( $i < $len ) {
  362. $s = $selector[$i];
  363. $i++;
  364. // Eat characters up to the next delimiter
  365. $tok = "";
  366. $in_attr = false;
  367. while ($i < $len) {
  368. $c = $selector[$i];
  369. $c_prev = $selector[$i-1];
  370. if ( !$in_attr && in_array($c, $delimiters) ) {
  371. break;
  372. }
  373. if ( $c_prev === "[" ) {
  374. $in_attr = true;
  375. }
  376. $tok .= $selector[$i++];
  377. if ( $in_attr && $c === "]" ) {
  378. $in_attr = false;
  379. break;
  380. }
  381. }
  382. switch ($s) {
  383. case " ":
  384. case ">":
  385. // All elements matching the next token that are direct children of
  386. // the current token
  387. $expr = $s === " " ? "descendant" : "child";
  388. if ( mb_substr($query, -1, 1) !== "/" ) {
  389. $query .= "/";
  390. }
  391. // Tag names are case-insensitive
  392. $tok = strtolower($tok);
  393. if ( !$tok ) {
  394. $tok = "*";
  395. }
  396. $query .= "$expr::$tok";
  397. $tok = "";
  398. break;
  399. case ".":
  400. case "#":
  401. // All elements matching the current token with a class/id equal to
  402. // the _next_ token.
  403. $attr = $s === "." ? "class" : "id";
  404. // empty class/id == *
  405. if ( mb_substr($query, -1, 1) === "/" ) {
  406. $query .= "*";
  407. }
  408. // Match multiple classes: $tok contains the current selected
  409. // class. Search for class attributes with class="$tok",
  410. // class=".* $tok .*" and class=".* $tok"
  411. // This doesn't work because libxml only supports XPath 1.0...
  412. //$query .= "[matches(@$attr,\"^${tok}\$|^${tok}[ ]+|[ ]+${tok}\$|[ ]+${tok}[ ]+\")]";
  413. // Query improvement by Michael Sheakoski <michael@mjsdigital.com>:
  414. $query .= "[contains(concat(' ', @$attr, ' '), concat(' ', '$tok', ' '))]";
  415. $tok = "";
  416. break;
  417. case "+":
  418. // All sibling elements that folow the current token
  419. if ( mb_substr($query, -1, 1) !== "/" ) {
  420. $query .= "/";
  421. }
  422. $query .= "following-sibling::$tok";
  423. $tok = "";
  424. break;
  425. case ":":
  426. $i2 = $i-strlen($tok)-2; // the char before ":"
  427. if ( !isset($selector[$i2]) || in_array($selector[$i2], $delimiters) ) {
  428. $query .= "*";
  429. }
  430. $last = false;
  431. // Pseudo-classes
  432. switch ($tok) {
  433. case "first-child":
  434. $query .= "[1]";
  435. $tok = "";
  436. break;
  437. case "last-child":
  438. $query .= "[not(following-sibling::*)]";
  439. $tok = "";
  440. break;
  441. case "first-of-type":
  442. $query .= "[position() = 1]";
  443. $tok = "";
  444. break;
  445. case "last-of-type":
  446. $query .= "[position() = last()]";
  447. $tok = "";
  448. break;
  449. // an+b, n, odd, and even
  450. case "nth-last-of-type":
  451. case "nth-last-child":
  452. $last = true;
  453. case "nth-of-type":
  454. case "nth-child":
  455. $p = $i+1;
  456. $nth = trim(mb_substr($selector, $p, strpos($selector, ")", $i)-$p));
  457. // 1
  458. if ( preg_match("/^\d+$/", $nth) ) {
  459. $condition = "position() = $nth";
  460. }
  461. // odd
  462. elseif ( $nth === "odd" ) {
  463. $condition = "(position() mod 2) = 1";
  464. }
  465. // even
  466. elseif ( $nth === "even" ) {
  467. $condition = "(position() mod 2) = 0";
  468. }
  469. // an+b
  470. else {
  471. $condition = $this->_selector_an_plus_b($nth, $last);
  472. }
  473. $query .= "[$condition]";
  474. $tok = "";
  475. break;
  476. case "link":
  477. $query .= "[@href]";
  478. $tok = "";
  479. break;
  480. case "first-line": // TODO
  481. case "first-letter": // TODO
  482. // N/A
  483. case "active":
  484. case "hover":
  485. case "visited":
  486. $query .= "[false()]";
  487. $tok = "";
  488. break;
  489. /* Pseudo-elements */
  490. case "before":
  491. case "after":
  492. if ( $first_pass ) {
  493. $pseudo_elements[$tok] = $tok;
  494. }
  495. else {
  496. $query .= "/*[@$tok]";
  497. }
  498. $tok = "";
  499. break;
  500. case "empty":
  501. $query .= "[not(*) and not(normalize-space())]";
  502. $tok = "";
  503. break;
  504. case "disabled":
  505. case "checked":
  506. $query .= "[@$tok]";
  507. $tok = "";
  508. break;
  509. case "enabled":
  510. $query .= "[not(@disabled)]";
  511. $tok = "";
  512. break;
  513. }
  514. break;
  515. case "[":
  516. // Attribute selectors. All with an attribute matching the following token(s)
  517. $attr_delimiters = array("=", "]", "~", "|", "$", "^", "*");
  518. $tok_len = mb_strlen($tok);
  519. $j = 0;
  520. $attr = "";
  521. $op = "";
  522. $value = "";
  523. while ( $j < $tok_len ) {
  524. if ( in_array($tok[$j], $attr_delimiters) ) {
  525. break;
  526. }
  527. $attr .= $tok[$j++];
  528. }
  529. switch ( $tok[$j] ) {
  530. case "~":
  531. case "|":
  532. case "$":
  533. case "^":
  534. case "*":
  535. $op .= $tok[$j++];
  536. if ( $tok[$j] !== "=" ) {
  537. throw new DOMPDF_Exception("Invalid CSS selector syntax: invalid attribute selector: $selector");
  538. }
  539. $op .= $tok[$j];
  540. break;
  541. case "=":
  542. $op = "=";
  543. break;
  544. }
  545. // Read the attribute value, if required
  546. if ( $op != "" ) {
  547. $j++;
  548. while ( $j < $tok_len ) {
  549. if ( $tok[$j] === "]" ) {
  550. break;
  551. }
  552. $value .= $tok[$j++];
  553. }
  554. }
  555. if ( $attr == "" ) {
  556. throw new DOMPDF_Exception("Invalid CSS selector syntax: missing attribute name");
  557. }
  558. $value = trim($value, "\"'");
  559. switch ( $op ) {
  560. case "":
  561. $query .= "[@$attr]";
  562. break;
  563. case "=":
  564. $query .= "[@$attr=\"$value\"]";
  565. break;
  566. case "~=":
  567. // FIXME: this will break if $value contains quoted strings
  568. // (e.g. [type~="a b c" "d e f"])
  569. $values = explode(" ", $value);
  570. $query .= "[";
  571. foreach ( $values as $val ) {
  572. $query .= "@$attr=\"$val\" or ";
  573. }
  574. $query = rtrim($query, " or ") . "]";
  575. break;
  576. case "|=":
  577. $values = explode("-", $value);
  578. $query .= "[";
  579. foreach ( $values as $val ) {
  580. $query .= "starts-with(@$attr, \"$val\") or ";
  581. }
  582. $query = rtrim($query, " or ") . "]";
  583. break;
  584. case "$=":
  585. $query .= "[substring(@$attr, string-length(@$attr)-".(strlen($value) - 1).")=\"$value\"]";
  586. break;
  587. case "^=":
  588. $query .= "[starts-with(@$attr,\"$value\")]";
  589. break;
  590. case "*=":
  591. $query .= "[contains(@$attr,\"$value\")]";
  592. break;
  593. }
  594. break;
  595. }
  596. }
  597. $i++;
  598. // case ":":
  599. // // Pseudo selectors: ignore for now. Partially handled directly
  600. // // below.
  601. // // Skip until the next special character, leaving the token as-is
  602. // while ( $i < $len ) {
  603. // if ( in_array($selector[$i], $delimiters) )
  604. // break;
  605. // $i++;
  606. // }
  607. // break;
  608. // default:
  609. // // Add the character to the token
  610. // $tok .= $selector[$i++];
  611. // break;
  612. // }
  613. // }
  614. // Trim the trailing '/' from the query
  615. if ( mb_strlen($query) > 2 ) {
  616. $query = rtrim($query, "/");
  617. }
  618. return array("query" => $query, "pseudo_elements" => $pseudo_elements);
  619. }
  620. // https://github.com/tenderlove/nokogiri/blob/master/lib/nokogiri/css/xpath_visitor.rb
  621. protected function _selector_an_plus_b($expr, $last = false) {
  622. $expr = preg_replace("/\s/", "", $expr);
  623. if ( !preg_match("/^(?P<a>-?[0-9]*)?n(?P<b>[-+]?[0-9]+)?$/", $expr, $matches)) {
  624. return "false()";
  625. }
  626. $a = ((isset($matches["a"]) && $matches["a"] !== "") ? intval($matches["a"]) : 1);
  627. $b = ((isset($matches["b"]) && $matches["b"] !== "") ? intval($matches["b"]) : 0);
  628. $position = ($last ? "(last()-position()+1)" : "position()");
  629. if ( $b == 0 ) {
  630. return "($position mod $a) = 0";
  631. }
  632. else {
  633. $compare = (($a < 0) ? "<=" : ">=");
  634. $b2 = -$b;
  635. if ( $b2 >= 0 ) {
  636. $b2 = "+$b2";
  637. }
  638. return "($position $compare $b) and ((($position $b2) mod ".abs($a).") = 0)";
  639. }
  640. }
  641. /**
  642. * applies all current styles to a particular document tree
  643. *
  644. * apply_styles() applies all currently loaded styles to the provided
  645. * {@link Frame_Tree}. Aside from parsing CSS, this is the main purpose
  646. * of this class.
  647. *
  648. * @param Frame_Tree $tree
  649. */
  650. function apply_styles(Frame_Tree $tree) {
  651. // Use XPath to select nodes. This would be easier if we could attach
  652. // Frame objects directly to DOMNodes using the setUserData() method, but
  653. // we can't do that just yet. Instead, we set a _node attribute_ in
  654. // Frame->set_id() and use that as a handle on the Frame object via
  655. // Frame_Tree::$_registry.
  656. // We create a scratch array of styles indexed by frame id. Once all
  657. // styles have been assigned, we order the cached styles by specificity
  658. // and create a final style object to assign to the frame.
  659. // FIXME: this is not particularly robust...
  660. $styles = array();
  661. $xp = new DOMXPath($tree->get_dom());
  662. // Add generated content
  663. foreach ($this->_styles as $selector => $style) {
  664. if ( strpos($selector, ":before") === false && strpos($selector, ":after") === false ) {
  665. continue;
  666. }
  667. $query = $this->_css_selector_to_xpath($selector, true);
  668. // Retrieve the nodes
  669. $nodes = @$xp->query($query["query"]);
  670. if ( $nodes == null ) {
  671. record_warnings(E_USER_WARNING, "The CSS selector '$selector' is not valid", __FILE__, __LINE__);
  672. continue;
  673. }
  674. foreach ($nodes as $node) {
  675. foreach ($query["pseudo_elements"] as $pos) {
  676. // Do not add a new pseudo element if another one already matched
  677. if ( $node->hasAttribute("dompdf_{$pos}_frame_id") ) {
  678. continue;
  679. }
  680. if (($src = $this->_image($style->content)) !== "none") {
  681. $new_node = $node->ownerDocument->createElement("img_generated");
  682. $new_node->setAttribute("src", $src);
  683. }
  684. else {
  685. $new_node = $node->ownerDocument->createElement("dompdf_generated");
  686. }
  687. $new_node->setAttribute($pos, $pos);
  688. $new_frame_id = $tree->insert_node($node, $new_node, $pos);
  689. $node->setAttribute("dompdf_{$pos}_frame_id", $new_frame_id);
  690. }
  691. }
  692. }
  693. // Apply all styles in stylesheet
  694. foreach ($this->_styles as $selector => $style) {
  695. $query = $this->_css_selector_to_xpath($selector);
  696. // Retrieve the nodes
  697. $nodes = @$xp->query($query["query"]);
  698. if ( $nodes == null ) {
  699. record_warnings(E_USER_WARNING, "The CSS selector '$selector' is not valid", __FILE__, __LINE__);
  700. continue;
  701. }
  702. foreach ($nodes as $node) {
  703. // Retrieve the node id
  704. // Only DOMElements get styles
  705. if ( $node->nodeType != XML_ELEMENT_NODE ) {
  706. continue;
  707. }
  708. $id = $node->getAttribute("frame_id");
  709. // Assign the current style to the scratch array
  710. $spec = $this->_specificity($selector);
  711. $styles[$id][$spec][] = $style;
  712. }
  713. }
  714. // Now create the styles and assign them to the appropriate frames. (We
  715. // iterate over the tree using an implicit Frame_Tree iterator.)
  716. $root_flg = false;
  717. foreach ($tree->get_frames() as $frame) {
  718. // pre_r($frame->get_node()->nodeName . ":");
  719. if ( !$root_flg && $this->_page_styles["base"] ) {
  720. $style = $this->_page_styles["base"];
  721. $root_flg = true;
  722. }
  723. else {
  724. $style = $this->create_style();
  725. }
  726. // Find nearest DOMElement parent
  727. $p = $frame;
  728. while ( $p = $p->get_parent() ) {
  729. if ( $p->get_node()->nodeType == XML_ELEMENT_NODE ) {
  730. break;
  731. }
  732. }
  733. // Styles can only be applied directly to DOMElements; anonymous
  734. // frames inherit from their parent
  735. if ( $frame->get_node()->nodeType != XML_ELEMENT_NODE ) {
  736. if ( $p ) {
  737. $style->inherit($p->get_style());
  738. }
  739. $frame->set_style($style);
  740. continue;
  741. }
  742. $id = $frame->get_id();
  743. // Handle HTML 4.0 attributes
  744. Attribute_Translator::translate_attributes($frame);
  745. if ( ($str = $frame->get_node()->getAttribute(Attribute_Translator::$_style_attr)) !== "" ) {
  746. // Lowest specificity
  747. $styles[$id][1][] = $this->_parse_properties($str);
  748. }
  749. // Locate any additional style attributes
  750. if ( ($str = $frame->get_node()->getAttribute("style")) !== "" ) {
  751. // Destroy CSS comments
  752. $str = preg_replace("'/\*.*?\*/'si", "", $str);
  753. $spec = $this->_specificity("!attr");
  754. $styles[$id][$spec][] = $this->_parse_properties($str);
  755. }
  756. // Grab the applicable styles
  757. if ( isset($styles[$id]) ) {
  758. $applied_styles = $styles[ $frame->get_id() ];
  759. // Sort by specificity
  760. ksort($applied_styles);
  761. if (DEBUGCSS) {
  762. $debug_nodename = $frame->get_node()->nodeName;
  763. print "<pre>\n[$debug_nodename\n";
  764. foreach ($applied_styles as $spec => $arr) {
  765. printf("specificity: 0x%08x\n",$spec);
  766. foreach ($arr as $s) {
  767. print "[\n";
  768. $s->debug_print();
  769. print "]\n";
  770. }
  771. }
  772. }
  773. // Merge the new styles with the inherited styles
  774. foreach ($applied_styles as $arr) {
  775. foreach ($arr as $s) {
  776. $style->merge($s);
  777. }
  778. }
  779. }
  780. // Inherit parent's styles if required
  781. if ( $p ) {
  782. if (DEBUGCSS) {
  783. print "inherit:\n";
  784. print "[\n";
  785. $p->get_style()->debug_print();
  786. print "]\n";
  787. }
  788. $style->inherit( $p->get_style() );
  789. }
  790. if (DEBUGCSS) {
  791. print "DomElementStyle:\n";
  792. print "[\n";
  793. $style->debug_print();
  794. print "]\n";
  795. print "/$debug_nodename]\n</pre>";
  796. }
  797. /*DEBUGCSS print: see below different print debugging method
  798. pre_r($frame->get_node()->nodeName . ":");
  799. echo "<pre>";
  800. echo $style;
  801. echo "</pre>";*/
  802. $frame->set_style($style);
  803. }
  804. // We're done! Clean out the registry of all styles since we
  805. // won't be needing this later.
  806. foreach ( array_keys($this->_styles) as $key ) {
  807. $this->_styles[$key] = null;
  808. unset($this->_styles[$key]);
  809. }
  810. }
  811. /**
  812. * parse a CSS string using a regex parser
  813. * Called by {@link Stylesheet::parse_css()}
  814. *
  815. * @param string $str
  816. *
  817. * @throws DOMPDF_Exception
  818. */
  819. private function _parse_css($str) {
  820. $str = trim($str);
  821. // Destroy comments and remove HTML comments
  822. $css = preg_replace(array(
  823. "'/\*.*?\*/'si",
  824. "/^<!--/",
  825. "/-->$/"
  826. ), "", $str);
  827. // FIXME: handle '{' within strings, e.g. [attr="string {}"]
  828. // Something more legible:
  829. $re =
  830. "/\s* # Skip leading whitespace \n".
  831. "( @([^\s]+)\s+([^{;]*) (?:;|({)) )? # Match @rules followed by ';' or '{' \n".
  832. "(?(1) # Only parse sub-sections if we're in an @rule... \n".
  833. " (?(4) # ...and if there was a leading '{' \n".
  834. " \s*( (?:(?>[^{}]+) ({)? # Parse rulesets and individual @page rules \n".
  835. " (?(6) (?>[^}]*) }) \s*)+? \n".
  836. " ) \n".
  837. " }) # Balancing '}' \n".
  838. "| # Branch to match regular rules (not preceeded by '@')\n".
  839. "([^{]*{[^}]*})) # Parse normal rulesets\n".
  840. "/xs";
  841. if ( preg_match_all($re, $css, $matches, PREG_SET_ORDER) === false ) {
  842. // An error occured
  843. throw new DOMPDF_Exception("Error parsing css file: preg_match_all() failed.");
  844. }
  845. // After matching, the array indicies are set as follows:
  846. //
  847. // [0] => complete text of match
  848. // [1] => contains '@import ...;' or '@media {' if applicable
  849. // [2] => text following @ for cases where [1] is set
  850. // [3] => media types or full text following '@import ...;'
  851. // [4] => '{', if present
  852. // [5] => rulesets within media rules
  853. // [6] => '{', within media rules
  854. // [7] => individual rules, outside of media rules
  855. //
  856. //pre_r($matches);
  857. foreach ( $matches as $match ) {
  858. $match[2] = trim($match[2]);
  859. if ( $match[2] !== "" ) {
  860. // Handle @rules
  861. switch ($match[2]) {
  862. case "import":
  863. $this->_parse_import($match[3]);
  864. break;
  865. case "media":
  866. $acceptedmedia = self::$ACCEPTED_GENERIC_MEDIA_TYPES;
  867. $acceptedmedia[] = $this->_dompdf->get_option("default_media_type");
  868. $media = preg_split("/\s*,\s*/", mb_strtolower(trim($match[3])));
  869. if ( count(array_intersect($acceptedmedia, $media)) ) {
  870. $this->_parse_sections($match[5]);
  871. }
  872. break;
  873. case "page":
  874. //This handles @page to be applied to page oriented media
  875. //Note: This has a reduced syntax:
  876. //@page { margin:1cm; color:blue; }
  877. //Not a sequence of styles like a full.css, but only the properties
  878. //of a single style, which is applied to the very first "root" frame before
  879. //processing other styles of the frame.
  880. //Working properties:
  881. // margin (for margin around edge of paper)
  882. // font-family (default font of pages)
  883. // color (default text color of pages)
  884. //Non working properties:
  885. // border
  886. // padding
  887. // background-color
  888. //Todo:Reason is unknown
  889. //Other properties (like further font or border attributes) not tested.
  890. //If a border or background color around each paper sheet is desired,
  891. //assign it to the <body> tag, possibly only for the css of the correct media type.
  892. // If the page has a name, skip the style.
  893. $page_selector = trim($match[3]);
  894. $key = null;
  895. switch($page_selector) {
  896. case "":
  897. $key = "base";
  898. break;
  899. case ":left":
  900. case ":right":
  901. case ":odd":
  902. case ":even":
  903. case ":first":
  904. $key = $page_selector;
  905. default: continue;
  906. }
  907. // Store the style for later...
  908. if ( empty($this->_page_styles[$key]) ) {
  909. $this->_page_styles[$key] = $this->_parse_properties($match[5]);
  910. }
  911. else {
  912. $this->_page_styles[$key]->merge($this->_parse_properties($match[5]));
  913. }
  914. break;
  915. case "font-face":
  916. $this->_parse_font_face($match[5]);
  917. break;
  918. default:
  919. // ignore everything else
  920. break;
  921. }
  922. continue;
  923. }
  924. if ( $match[7] !== "" ) {
  925. $this->_parse_sections($match[7]);
  926. }
  927. }
  928. }
  929. /* See also style.cls Style::_image(), refactoring?, works also for imported css files */
  930. protected function _image($val) {
  931. $DEBUGCSS=DEBUGCSS;
  932. $parsed_url = "none";
  933. if ( mb_strpos($val, "url") === false ) {
  934. $path = "none"; //Don't resolve no image -> otherwise would prefix path and no longer recognize as none
  935. }
  936. else {
  937. $val = preg_replace("/url\(['\"]?([^'\")]+)['\"]?\)/","\\1", trim($val));
  938. // Resolve the url now in the context of the current stylesheet
  939. $parsed_url = explode_url($val);
  940. if ( $parsed_url["protocol"] == "" && $this->get_protocol() == "" ) {
  941. if ($parsed_url["path"][0] === '/' || $parsed_url["path"][0] === '\\' ) {
  942. $path = $_SERVER["DOCUMENT_ROOT"].'/';
  943. }
  944. else {
  945. $path = $this->get_base_path();
  946. }
  947. $path .= $parsed_url["path"] . $parsed_url["file"];
  948. $path = realpath($path);
  949. // If realpath returns FALSE then specifically state that there is no background image
  950. // FIXME: Is this causing problems for imported CSS files? There are some './none' references when running the test cases.
  951. if (!$path) { $path = 'none'; }
  952. }
  953. else {
  954. $path = build_url($this->get_protocol(),
  955. $this->get_host(),
  956. $this->get_base_path(),
  957. $val);
  958. }
  959. }
  960. if ($DEBUGCSS) {
  961. print "<pre>[_image\n";
  962. print_r($parsed_url);
  963. print $this->get_protocol()."\n".$this->get_base_path()."\n".$path."\n";
  964. print "_image]</pre>";;
  965. }
  966. return $path;
  967. }
  968. /**
  969. * parse @import{} sections
  970. *
  971. * @param string $url the url of the imported CSS file
  972. */
  973. private function _parse_import($url) {
  974. $arr = preg_split("/[\s\n,]/", $url,-1, PREG_SPLIT_NO_EMPTY);
  975. $url = array_shift($arr);
  976. $accept = false;
  977. if ( count($arr) > 0 ) {
  978. $acceptedmedia = self::$ACCEPTED_GENERIC_MEDIA_TYPES;
  979. $acceptedmedia[] = $this->_dompdf->get_option("default_media_type");
  980. // @import url media_type [media_type...]
  981. foreach ( $arr as $type ) {
  982. if ( in_array(mb_strtolower(trim($type)), $acceptedmedia) ) {
  983. $accept = true;
  984. break;
  985. }
  986. }
  987. }
  988. else {
  989. // unconditional import
  990. $accept = true;
  991. }
  992. if ( $accept ) {
  993. // Store our current base url properties in case the new url is elsewhere
  994. $protocol = $this->_protocol;
  995. $host = $this->_base_host;
  996. $path = $this->_base_path;
  997. // $url = str_replace(array('"',"url", "(", ")"), "", $url);
  998. // If the protocol is php, assume that we will import using file://
  999. // $url = build_url($protocol == "php://" ? "file://" : $protocol, $host, $path, $url);
  1000. // Above does not work for subfolders and absolute urls.
  1001. // Todo: As above, do we need to replace php or file to an empty protocol for local files?
  1002. $url = $this->_image($url);
  1003. $this->load_css_file($url);
  1004. // Restore the current base url
  1005. $this->_protocol = $protocol;
  1006. $this->_base_host = $host;
  1007. $this->_base_path = $path;
  1008. }
  1009. }
  1010. /**
  1011. * parse @font-face{} sections
  1012. * http://www.w3.org/TR/css3-fonts/#the-font-face-rule
  1013. *
  1014. * @param string $str CSS @font-face rules
  1015. * @return Style
  1016. */
  1017. private function _parse_font_face($str) {
  1018. $descriptors = $this->_parse_properties($str);
  1019. preg_match_all("/(url|local)\s*\([\"\']?([^\"\'\)]+)[\"\']?\)\s*(format\s*\([\"\']?([^\"\'\)]+)[\"\']?\))?/i", $descriptors->src, $src);
  1020. $sources = array();
  1021. $valid_sources = array();
  1022. foreach($src[0] as $i => $value) {
  1023. $source = array(
  1024. "local" => strtolower($src[1][$i]) === "local",
  1025. "uri" => $src[2][$i],
  1026. "format" => $src[4][$i],
  1027. "path" => build_url($this->_protocol, $this->_base_host, $this->_base_path, $src[2][$i]),
  1028. );
  1029. if ( !$source["local"] && in_array($source["format"], array("", "woff", "opentype", "truetype")) ) {
  1030. $valid_sources[] = $source;
  1031. }
  1032. $sources[] = $source;
  1033. }
  1034. // No valid sources
  1035. if ( empty($valid_sources) ) {
  1036. return;
  1037. }
  1038. $style = array(
  1039. "family" => $descriptors->get_font_family_raw(),
  1040. "weight" => $descriptors->font_weight,
  1041. "style" => $descriptors->font_style,
  1042. );
  1043. Font_Metrics::register_font($style, $valid_sources[0]["path"]);
  1044. }
  1045. /**
  1046. * parse regular CSS blocks
  1047. *
  1048. * _parse_properties() creates a new Style object based on the provided
  1049. * CSS rules.
  1050. *
  1051. * @param string $str CSS rules
  1052. * @return Style
  1053. */
  1054. private function _parse_properties($str) {
  1055. $properties = preg_split("/;(?=(?:[^\(]*\([^\)]*\))*(?![^\)]*\)))/", $str);
  1056. if (DEBUGCSS) print '[_parse_properties';
  1057. // Create the style
  1058. $style = new Style($this);
  1059. foreach ($properties as $prop) {
  1060. // If the $prop contains an url, the regex may be wrong
  1061. // @todo: fix the regex so that it works everytime
  1062. /*if (strpos($prop, "url(") === false) {
  1063. if (preg_match("/([a-z-]+)\s*:\s*[^:]+$/i", $prop, $m))
  1064. $prop = $m[0];
  1065. }*/
  1066. //A css property can have " ! important" appended (whitespace optional)
  1067. //strip this off to decode core of the property correctly.
  1068. //Pass on in the style to allow proper handling:
  1069. //!important properties can only be overridden by other !important ones.
  1070. //$style->$prop_name = is a shortcut of $style->__set($prop_name,$value);.
  1071. //If no specific set function available, set _props["prop_name"]
  1072. //style is always copied completely, or $_props handled separately
  1073. //Therefore set a _important_props["prop_name"]=true to indicate the modifier
  1074. /* Instead of short code, prefer the typical case with fast code
  1075. $important = preg_match("/(.*?)!\s*important/",$prop,$match);
  1076. if ( $important ) {
  1077. $prop = $match[1];
  1078. }
  1079. $prop = trim($prop);
  1080. */
  1081. if (DEBUGCSS) print '(';
  1082. $important = false;
  1083. $prop = trim($prop);
  1084. if ( substr($prop, -9) === 'important' ) {
  1085. $prop_tmp = rtrim(substr($prop, 0, -9));
  1086. if ( substr($prop_tmp, -1) === '!' ) {
  1087. $prop = rtrim(substr($prop_tmp, 0, -1));
  1088. $important = true;
  1089. }
  1090. }
  1091. if ( $prop === "" ) {
  1092. if (DEBUGCSS) print 'empty)';
  1093. continue;
  1094. }
  1095. $i = mb_strpos($prop, ":");
  1096. if ( $i === false ) {
  1097. if (DEBUGCSS) print 'novalue'.$prop.')';
  1098. continue;
  1099. }
  1100. $prop_name = rtrim(mb_strtolower(mb_substr($prop, 0, $i)));
  1101. $value = ltrim(mb_substr($prop, $i+1));
  1102. if (DEBUGCSS) print $prop_name.':='.$value.($important?'!IMPORTANT':'').')';
  1103. //New style, anyway empty
  1104. //if ($important || !$style->important_get($prop_name) ) {
  1105. //$style->$prop_name = array($value,$important);
  1106. //assignment might be replaced by overloading through __set,
  1107. //and overloaded functions might check _important_props,
  1108. //therefore set _important_props first.
  1109. if ($important) {
  1110. $style->important_set($prop_name);
  1111. }
  1112. //For easier debugging, don't use overloading of assignments with __set
  1113. $style->$prop_name = $value;
  1114. //$style->props_set($prop_name, $value);
  1115. }
  1116. if (DEBUGCSS) print '_parse_properties]';
  1117. return $style;
  1118. }
  1119. /**
  1120. * parse selector + rulesets
  1121. *
  1122. * @param string $str CSS selectors and rulesets
  1123. */
  1124. private function _parse_sections($str) {
  1125. // Pre-process: collapse all whitespace and strip whitespace around '>',
  1126. // '.', ':', '+', '#'
  1127. $patterns = array("/[\\s\n]+/", "/\\s+([>.:+#])\\s+/");
  1128. $replacements = array(" ", "\\1");
  1129. $str = preg_replace($patterns, $replacements, $str);
  1130. $sections = explode("}", $str);
  1131. if (DEBUGCSS) print '[_parse_sections';
  1132. foreach ($sections as $sect) {
  1133. $i = mb_strpos($sect, "{");
  1134. $selectors = explode(",", mb_substr($sect, 0, $i));
  1135. if (DEBUGCSS) print '[section';
  1136. $style = $this->_parse_properties(trim(mb_substr($sect, $i+1)));
  1137. // Assign it to the selected elements
  1138. foreach ($selectors as $selector) {
  1139. $selector = trim($selector);
  1140. if ($selector == "") {
  1141. if (DEBUGCSS) print '#empty#';
  1142. continue;
  1143. }
  1144. if (DEBUGCSS) print '#'.$selector.'#';
  1145. //if (DEBUGCSS) { if (strpos($selector,'p') !== false) print '!!!p!!!#'; }
  1146. $this->add_style($selector, $style);
  1147. }
  1148. if (DEBUGCSS) print 'section]';
  1149. }
  1150. if (DEBUGCSS) print '_parse_sections]';
  1151. }
  1152. /**
  1153. * dumps the entire stylesheet as a string
  1154. *
  1155. * Generates a string of each selector and associated style in the
  1156. * Stylesheet. Useful for debugging.
  1157. *
  1158. * @return string
  1159. */
  1160. function __toString() {
  1161. $str = "";
  1162. foreach ($this->_styles as $selector => $style) {
  1163. $str .= "$selector => " . $style->__toString() . "\n";
  1164. }
  1165. return $str;
  1166. }
  1167. }