fpdi_pdf_parser.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. <?php
  2. //
  3. // FPDI - Version 1.2
  4. //
  5. // Copyright 2004-2007 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. class fpdi_pdf_parser extends pdf_parser {
  20. /**
  21. * Pages
  22. * Index beginns at 0
  23. *
  24. * @var array
  25. */
  26. var $pages;
  27. /**
  28. * Page count
  29. * @var integer
  30. */
  31. var $page_count;
  32. /**
  33. * actual page number
  34. * @var integer
  35. */
  36. var $pageno;
  37. /**
  38. * FPDI Reference
  39. * @var object
  40. */
  41. var $fpdi;
  42. /**
  43. * Available BoxTypes
  44. *
  45. * @var array
  46. */
  47. var $availableBoxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
  48. /**
  49. * Constructor
  50. *
  51. * @param string $filename Source-Filename
  52. * @param object $fpdi Object of type fpdi
  53. */
  54. function fpdi_pdf_parser($filename,&$fpdi) {
  55. $this->fpdi =& $fpdi;
  56. $this->filename = $filename;
  57. parent::pdf_parser($filename);
  58. if ($this->success == false) { return false; }
  59. // resolve Pages-Dictonary
  60. $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
  61. if ($this->success == false) { return false; }
  62. // Read pages
  63. $this->read_pages($this->c, $pages, $this->pages);
  64. if ($this->success == false) { return false; }
  65. // count pages;
  66. $this->page_count = count($this->pages);
  67. }
  68. /**
  69. * Get pagecount from sourcefile
  70. *
  71. * @return int
  72. */
  73. function getPageCount() {
  74. return $this->page_count;
  75. }
  76. /**
  77. * Set pageno
  78. *
  79. * @param int $pageno Pagenumber to use
  80. */
  81. function setPageno($pageno) {
  82. $pageno = ((int) $pageno) - 1;
  83. if ($pageno < 0 || $pageno >= $this->getPageCount()) {
  84. $this->fpdi->error("Pagenumber is wrong!");
  85. }
  86. $this->pageno = $pageno;
  87. }
  88. /**
  89. * Get page-resources from current page
  90. *
  91. * @return array
  92. */
  93. function getPageResources() {
  94. return $this->_getPageResources($this->pages[$this->pageno]);
  95. }
  96. /**
  97. * Get page-resources from /Page
  98. *
  99. * @param array $obj Array of pdf-data
  100. */
  101. function _getPageResources ($obj) { // $obj = /Page
  102. $obj = $this->pdf_resolve_object($this->c, $obj);
  103. // If the current object has a resources
  104. // dictionary associated with it, we use
  105. // it. Otherwise, we move back to its
  106. // parent object.
  107. if (isset ($obj[1][1]['/Resources'])) {
  108. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
  109. if ($res[0] == PDF_TYPE_OBJECT)
  110. return $res[1];
  111. return $res;
  112. } else {
  113. if (!isset ($obj[1][1]['/Parent'])) {
  114. return false;
  115. } else {
  116. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  117. if ($res[0] == PDF_TYPE_OBJECT)
  118. return $res[1];
  119. return $res;
  120. }
  121. }
  122. }
  123. /**
  124. * Get content of current page
  125. *
  126. * If more /Contents is an array, the streams are concated
  127. *
  128. * @return string
  129. */
  130. function getContent() {
  131. $buffer = "";
  132. if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
  133. $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
  134. foreach($contents AS $tmp_content) {
  135. $buffer .= $this->_rebuildContentStream($tmp_content).' ';
  136. }
  137. }
  138. return $buffer;
  139. }
  140. /**
  141. * Resolve all content-objects
  142. *
  143. * @param array $content_ref
  144. * @return array
  145. */
  146. function _getPageContent($content_ref) {
  147. $contents = array();
  148. if ($content_ref[0] == PDF_TYPE_OBJREF) {
  149. $content = $this->pdf_resolve_object($this->c, $content_ref);
  150. if ($content[1][0] == PDF_TYPE_ARRAY) {
  151. $contents = $this->_getPageContent($content[1]);
  152. } else {
  153. $contents[] = $content;
  154. }
  155. } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
  156. foreach ($content_ref[1] AS $tmp_content_ref) {
  157. $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
  158. }
  159. }
  160. return $contents;
  161. }
  162. /**
  163. * Rebuild content-streams
  164. *
  165. * @param array $obj
  166. * @return string
  167. */
  168. function _rebuildContentStream($obj) {
  169. $filters = array();
  170. if (isset($obj[1][1]['/Filter'])) {
  171. $_filter = $obj[1][1]['/Filter'];
  172. if ($_filter[0] == PDF_TYPE_TOKEN) {
  173. $filters[] = $_filter;
  174. } else if ($_filter[0] == PDF_TYPE_ARRAY) {
  175. $filters = $_filter[1];
  176. }
  177. }
  178. $stream = $obj[2][1];
  179. foreach ($filters AS $_filter) {
  180. switch ($_filter[1]) {
  181. case "/FlateDecode":
  182. if (function_exists('gzuncompress')) {
  183. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  184. } else {
  185. $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
  186. }
  187. if ($stream === false) {
  188. $this->fpdi->error("Error while decompressing stream.");
  189. }
  190. break;
  191. // mPDF 4.2.003
  192. case '/LZWDecode':
  193. include_once(_MPDF_PATH.'mpdfi/filters/FilterLZW.php');
  194. // mPDF 5.0 Removed pass by reference =&
  195. $decoder = new FilterLZW();
  196. $stream = $decoder->decode($stream);
  197. break;
  198. case '/ASCII85Decode':
  199. include_once(_MPDF_PATH.'mpdfi/filters/FilterASCII85.php');
  200. // mPDF 5.0 Removed pass by reference =&
  201. $decoder = new FilterASCII85();
  202. $stream = $decoder->decode($stream);
  203. break;
  204. case null:
  205. $stream = $stream;
  206. break;
  207. default:
  208. $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
  209. }
  210. }
  211. return $stream;
  212. }
  213. /**
  214. * Get a Box from a page
  215. * Arrayformat is same as used by fpdf_tpl
  216. *
  217. * @param array $page a /Page
  218. * @param string $box_index Type of Box @see $availableBoxes
  219. * @return array
  220. */
  221. function getPageBox($page, $box_index) {
  222. $page = $this->pdf_resolve_object($this->c,$page);
  223. $box = null;
  224. if (isset($page[1][1][$box_index]))
  225. $box =& $page[1][1][$box_index];
  226. if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
  227. $tmp_box = $this->pdf_resolve_object($this->c,$box);
  228. $box = $tmp_box[1];
  229. }
  230. if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
  231. $b =& $box[1];
  232. return array("x" => $b[0][1]/_MPDFK,
  233. "y" => $b[1][1]/_MPDFK,
  234. "w" => abs($b[0][1]-$b[2][1])/_MPDFK,
  235. "h" => abs($b[1][1]-$b[3][1])/_MPDFK); // mPDF 5.3.90
  236. } else if (!isset ($page[1][1]['/Parent'])) {
  237. return false;
  238. } else {
  239. return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
  240. }
  241. }
  242. function getPageBoxes($pageno) {
  243. return $this->_getPageBoxes($this->pages[$pageno-1]);
  244. }
  245. /**
  246. * Get all Boxes from /Page
  247. *
  248. * @param array a /Page
  249. * @return array
  250. */
  251. function _getPageBoxes($page) {
  252. $boxes = array();
  253. foreach($this->availableBoxes AS $box) {
  254. if ($_box = $this->getPageBox($page,$box)) {
  255. $boxes[$box] = $_box;
  256. }
  257. }
  258. return $boxes;
  259. }
  260. function getPageRotation($pageno) {
  261. return $this->_getPageRotation($this->pages[$pageno-1]);
  262. }
  263. function _getPageRotation ($obj) { // $obj = /Page
  264. $obj = $this->pdf_resolve_object($this->c, $obj);
  265. if (isset ($obj[1][1]['/Rotate'])) {
  266. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']);
  267. if ($res[0] == PDF_TYPE_OBJECT)
  268. return $res[1];
  269. return $res;
  270. } else {
  271. if (!isset ($obj[1][1]['/Parent'])) {
  272. return false;
  273. } else {
  274. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  275. if ($res[0] == PDF_TYPE_OBJECT)
  276. return $res[1];
  277. return $res;
  278. }
  279. }
  280. }
  281. /**
  282. * Read all /Page(es)
  283. *
  284. * @param object pdf_context
  285. * @param array /Pages
  286. * @param array the result-array
  287. */
  288. function read_pages (&$c, &$pages, &$result) {
  289. // Get the kids dictionary
  290. $kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
  291. if (!is_array($kids)) {
  292. // mPDF 4.0
  293. $this->success = false;
  294. $this->errormsg = sprintf("Cannot find /Kids in current /Page-Dictionary");
  295. return false;
  296. }
  297. foreach ($kids[1] as $v) {
  298. $pg = $this->pdf_resolve_object ($c, $v);
  299. if ($pg[1][1]['/Type'][1] === '/Pages') {
  300. // If one of the kids is an embedded
  301. // /Pages array, resolve it as well.
  302. $this->read_pages ($c, $pg, $result);
  303. } else {
  304. $result[] = $pg;
  305. }
  306. }
  307. }
  308. }
  309. ?>