123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503 |
- <?php
- require_once(dirname(__FILE__).'/tcpdf_filters.php');
- class TCPDF_PARSER {
-
- private $pdfdata = '';
-
- protected $xref = array();
-
- protected $objects = array();
-
- private $FilterDecoders;
-
- public function __construct($data) {
- if (empty($data)) {
- $this->Error('Empty PDF data.');
- }
- $this->pdfdata = $data;
-
- $pdflen = strlen($this->pdfdata);
-
- $this->FilterDecoders = new TCPDF_FILTERS();
-
- $this->xref = $this->getXrefData();
-
- $this->objects = array();
- foreach ($this->xref['xref'] as $obj => $offset) {
- if (!isset($this->objects[$obj])) {
- $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
- }
- }
-
- unset($this->pdfdata);
- $this->pdfdata = '';
- }
-
- public function getParsedData() {
- return array($this->xref, $this->objects);
- }
-
- protected function getXrefData($offset=0, $xref=array()) {
-
- if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
- $this->Error('Unable to find startxref');
- }
- $matches = array_pop($matches);
- $startxref = $matches[1];
-
- if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
- $this->Error('Unable to find xref');
- }
-
- $offset = $startxref + 5;
-
- $obj_num = 0;
- while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
- $offset = (strlen($matches[0][0]) + $matches[0][1]);
- if ($matches[3][0] == 'n') {
-
- $index = $obj_num.'_'.intval($matches[2][0]);
-
- if (!isset($xref['xref'][$index])) {
-
- $xref['xref'][$index] = intval($matches[1][0]);
- }
- ++$obj_num;
- $offset += 2;
- } elseif ($matches[3][0] == 'f') {
- ++$obj_num;
- $offset += 2;
- } else {
-
- $obj_num = intval($matches[1][0]);
- }
- }
-
- if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
- $trailer_data = $matches[1][0];
- if (!isset($xref['trailer'])) {
-
- $xref['trailer'] = array();
-
- if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
- $xref['trailer']['size'] = intval($matches[1]);
- }
- if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
- }
- if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
- }
- if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
- }
- if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
- $xref['trailer']['id'] = array();
- $xref['trailer']['id'][0] = $matches[1];
- $xref['trailer']['id'][1] = $matches[2];
- }
- }
- if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
-
- $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref);
- }
- } else {
- $this->Error('Unable to find trailer');
- }
- return $xref;
- }
-
- protected function getRawObject($offset=0) {
- $objtype = '';
- $objval = '';
-
- $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
-
- $char = $this->pdfdata{$offset};
-
- switch ($char) {
- case '%': {
-
- $next = strcspn($this->pdfdata, "\r\n", $offset);
- if ($next > 0) {
- $offset += $next;
- return $this->getRawObject($this->pdfdata, $offset);
- }
- break;
- }
- case '/': {
-
- $objtype = $char;
- ++$offset;
- if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
- $objval = $matches[1];
- $offset += strlen($objval);
- }
- break;
- }
- case '(':
- case ')': {
-
- $objtype = $char;
- ++$offset;
- $strpos = $offset;
- if ($char == '(') {
- $open_bracket = 1;
- while ($open_bracket > 0) {
- if (!isset($this->pdfdata{$strpos})) {
- break;
- }
- $ch = $this->pdfdata{$strpos};
- switch ($ch) {
- case '\\': {
-
- ++$strpos;
- break;
- }
- case '(': {
- ++$open_bracket;
- break;
- }
- case ')': {
- --$open_bracket;
- break;
- }
- }
- ++$strpos;
- }
- $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
- $offset = $strpos;
- }
- break;
- }
- case '[':
- case ']': {
-
- $objtype = $char;
- ++$offset;
- if ($char == '[') {
-
- $objval = array();
- do {
-
- $element = $this->getRawObject($offset);
- $offset = $element[2];
- $objval[] = $element;
- } while ($element[0] != ']');
-
- array_pop($objval);
- }
- break;
- }
- case '<':
- case '>': {
- if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
-
- $objtype = $char.$char;
- $offset += 2;
- if ($char == '<') {
-
- $objval = array();
- do {
-
- $element = $this->getRawObject($offset);
- $offset = $element[2];
- $objval[] = $element;
- } while ($element[0] != '>>');
-
- array_pop($objval);
- }
- } else {
-
- $objtype = $char;
- ++$offset;
- if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
- $objval = $matches[1];
- $offset += strlen($matches[0]);
- }
- }
- break;
- }
- default: {
- if (substr($this->pdfdata, $offset, 6) == 'endobj') {
-
- $objtype = 'endobj';
- $offset += 6;
- } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
-
- $objtype = 'null';
- $offset += 4;
- $objval = 'null';
- } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
-
- $objtype = 'boolean';
- $offset += 4;
- $objval = 'true';
- } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
-
- $objtype = 'boolean';
- $offset += 5;
- $objval = 'false';
- } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
-
- $objtype = 'stream';
- $offset += 6;
- if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
- $objval = $matches[1];
- $offset += strlen($matches[0]);
- }
- } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
-
- $objtype = 'endstream';
- $offset += 9;
- } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
-
- $objtype = 'ojbref';
- $offset += strlen($matches[0]);
- $objval = intval($matches[1]).'_'.intval($matches[2]);
- } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
-
- $objtype = 'ojb';
- $objval = intval($matches[1]).'_'.intval($matches[2]);
- $offset += strlen ($matches[0]);
- } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
-
- $objtype = 'numeric';
- $objval = substr($this->pdfdata, $offset, $numlen);
- $offset += $numlen;
- }
- break;
- }
- }
- return array($objtype, $objval, $offset);
- }
-
- protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
- $obj = explode('_', $obj_ref);
- if (($obj === false) OR (count($obj) != 2)) {
- $this->Error('Invalid object reference: '.$obj);
- return;
- }
- $objref = $obj[0].' '.$obj[1].' obj';
- if (strpos($this->pdfdata, $objref, $offset) != $offset) {
-
- return array('null', 'null', $offset);
- }
-
- $offset += strlen($objref);
-
- $objdata = array();
- $i = 0;
- do {
-
- $element = $this->getRawObject($offset);
- $offset = $element[2];
-
- if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
- $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
- }
- $objdata[$i] = $element;
- ++$i;
- } while ($element[0] != 'endobj');
-
- array_pop($objdata);
-
- return $objdata;
- }
-
- protected function getObjectVal($obj) {
- if ($obj[0] == 'objref') {
-
- if (isset($this->objects[$obj[1]])) {
-
- return $this->objects[$obj[1]];
- } elseif (isset($this->xref[$obj[1]])) {
-
- $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
- return $this->objects[$obj[1]];
- }
- }
- return $obj;
- }
-
- protected function decodeStream($sdic, $stream) {
-
- $slength = strlen($stream);
- $filters = array();
- foreach ($sdic as $k => $v) {
- if ($v[0] == '/') {
- if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
-
- $declength = intval($sdic[($k + 1)][1]);
- if ($declength < $slength) {
- $stream = substr($stream, 0, $declength);
- $slength = $declength;
- }
- } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
-
- $objval = $this->getObjectVal($sdic[($k + 1)]);
- if ($objval[0] == '/') {
-
- $filters[] = $objval[1];
- } elseif ($objval[0] == '[') {
-
- foreach ($objval[1] as $flt) {
- if ($flt[0] == '/') {
- $filters[] = $flt[1];
- }
- }
- }
- }
- }
- }
-
- $remaining_filters = array();
- foreach ($filters as $filter) {
- if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
- $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
- } else {
-
- $remaining_filters[] = $filter;
- }
- }
- return array($stream, $remaining_filters);
- }
-
- public function Error($msg) {
-
- die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
- }
- }
|