Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

pdf_parser.php 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. <?php
  2. /**
  3. * This file is part of FPDI
  4. *
  5. * @package FPDI
  6. * @copyright Copyright (c) 2015 Setasign - Jan Slabon (http://www.setasign.com)
  7. * @license http://opensource.org/licenses/mit-license The MIT License
  8. * @version 1.6.1
  9. */
  10. /**
  11. * Class pdf_parser
  12. */
  13. class pdf_parser
  14. {
  15. /**
  16. * Type constant
  17. *
  18. * @var integer
  19. */
  20. const TYPE_NULL = 0;
  21. /**
  22. * Type constant
  23. *
  24. * @var integer
  25. */
  26. const TYPE_NUMERIC = 1;
  27. /**
  28. * Type constant
  29. *
  30. * @var integer
  31. */
  32. const TYPE_TOKEN = 2;
  33. /**
  34. * Type constant
  35. *
  36. * @var integer
  37. */
  38. const TYPE_HEX = 3;
  39. /**
  40. * Type constant
  41. *
  42. * @var integer
  43. */
  44. const TYPE_STRING = 4;
  45. /**
  46. * Type constant
  47. *
  48. * @var integer
  49. */
  50. const TYPE_DICTIONARY = 5;
  51. /**
  52. * Type constant
  53. *
  54. * @var integer
  55. */
  56. const TYPE_ARRAY = 6;
  57. /**
  58. * Type constant
  59. *
  60. * @var integer
  61. */
  62. const TYPE_OBJDEC = 7;
  63. /**
  64. * Type constant
  65. *
  66. * @var integer
  67. */
  68. const TYPE_OBJREF = 8;
  69. /**
  70. * Type constant
  71. *
  72. * @var integer
  73. */
  74. const TYPE_OBJECT = 9;
  75. /**
  76. * Type constant
  77. *
  78. * @var integer
  79. */
  80. const TYPE_STREAM = 10;
  81. /**
  82. * Type constant
  83. *
  84. * @var integer
  85. */
  86. const TYPE_BOOLEAN = 11;
  87. /**
  88. * Type constant
  89. *
  90. * @var integer
  91. */
  92. const TYPE_REAL = 12;
  93. /**
  94. * Define the amount of byte in which the initial keyword of a PDF document should be searched.
  95. *
  96. * @var int
  97. */
  98. static public $searchForStartxrefLength = 5500;
  99. /**
  100. * Filename
  101. *
  102. * @var string
  103. */
  104. public $filename;
  105. /**
  106. * File resource
  107. *
  108. * @var resource
  109. */
  110. protected $_f;
  111. /**
  112. * PDF Context
  113. *
  114. * @var pdf_context
  115. */
  116. protected $_c;
  117. /**
  118. * xref-Data
  119. *
  120. * @var array
  121. */
  122. protected $_xref;
  123. /**
  124. * Data of the Root object
  125. *
  126. * @var array
  127. */
  128. protected $_root;
  129. /**
  130. * PDF version of the loaded document
  131. *
  132. * @var string
  133. */
  134. protected $_pdfVersion;
  135. /**
  136. * For reading encrypted documents and xref/object streams are in use
  137. *
  138. * @var boolean
  139. */
  140. protected $_readPlain = true;
  141. /**
  142. * The current read object
  143. *
  144. * @var array
  145. */
  146. protected $_currentObj;
  147. /**
  148. * Constructor
  149. *
  150. * @param string $filename Source filename
  151. * @throws InvalidArgumentException
  152. */
  153. public function __construct($filename)
  154. {
  155. $this->filename = $filename;
  156. $this->_f = @fopen($this->filename, 'rb');
  157. if (!$this->_f) {
  158. throw new InvalidArgumentException(sprintf('Cannot open %s !', $filename));
  159. }
  160. $this->getPdfVersion();
  161. if (!class_exists('pdf_context')) {
  162. require_once('pdf_context.php');
  163. }
  164. $this->_c = new pdf_context($this->_f);
  165. // Read xref-Data
  166. $this->_xref = array();
  167. $this->_readXref($this->_xref, $this->_findXref());
  168. // Check for Encryption
  169. $this->getEncryption();
  170. // Read root
  171. $this->_readRoot();
  172. }
  173. /**
  174. * Destructor
  175. */
  176. public function __destruct()
  177. {
  178. $this->closeFile();
  179. }
  180. /**
  181. * Close the opened file
  182. */
  183. public function closeFile()
  184. {
  185. if (isset($this->_f) && is_resource($this->_f)) {
  186. fclose($this->_f);
  187. unset($this->_f);
  188. }
  189. }
  190. /**
  191. * Check Trailer for Encryption
  192. *
  193. * @throws Exception
  194. */
  195. public function getEncryption()
  196. {
  197. if (isset($this->_xref['trailer'][1]['/Encrypt'])) {
  198. throw new Exception('File is encrypted!');
  199. }
  200. }
  201. /**
  202. * Get PDF-Version
  203. *
  204. * @return string
  205. */
  206. public function getPdfVersion()
  207. {
  208. if ($this->_pdfVersion === null) {
  209. fseek($this->_f, 0);
  210. preg_match('/\d\.\d/', fread($this->_f, 16), $m);
  211. if (isset($m[0]))
  212. $this->_pdfVersion = $m[0];
  213. }
  214. return $this->_pdfVersion;
  215. }
  216. /**
  217. * Read the /Root dictionary
  218. */
  219. protected function _readRoot()
  220. {
  221. if ($this->_xref['trailer'][1]['/Root'][0] != self::TYPE_OBJREF) {
  222. throw new Exception('Wrong Type of Root-Element! Must be an indirect reference');
  223. }
  224. $this->_root = $this->resolveObject($this->_xref['trailer'][1]['/Root']);
  225. }
  226. /**
  227. * Find the xref table
  228. *
  229. * @return integer
  230. * @throws Exception
  231. */
  232. protected function _findXref()
  233. {
  234. $toRead = self::$searchForStartxrefLength;
  235. $stat = fseek($this->_f, -$toRead, SEEK_END);
  236. if ($stat === -1) {
  237. fseek($this->_f, 0);
  238. }
  239. $data = fread($this->_f, $toRead);
  240. $keywordPos = strpos(strrev($data), strrev('startxref'));
  241. if (false === $keywordPos) {
  242. $keywordPos = strpos(strrev($data), strrev('startref'));
  243. }
  244. if (false === $keywordPos) {
  245. throw new Exception('Unable to find "startxref" keyword.');
  246. }
  247. $pos = strlen($data) - $keywordPos;
  248. $data = substr($data, $pos);
  249. if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
  250. throw new Exception('Unable to find pointer to xref table.');
  251. }
  252. return (int) $matches[1];
  253. }
  254. /**
  255. * Read the xref table
  256. *
  257. * @param array $result Array of xref table entries
  258. * @param integer $offset of xref table
  259. * @return boolean
  260. * @throws Exception
  261. */
  262. protected function _readXref(&$result, $offset)
  263. {
  264. $tempPos = $offset - min(20, $offset);
  265. fseek($this->_f, $tempPos); // set some bytes backwards to fetch corrupted docs
  266. $data = fread($this->_f, 100);
  267. $xrefPos = strrpos($data, 'xref');
  268. if ($xrefPos === false) {
  269. $this->_c->reset($offset);
  270. $xrefStreamObjDec = $this->_readValue($this->_c);
  271. if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == self::TYPE_OBJDEC) {
  272. throw new Exception(
  273. sprintf(
  274. 'This document (%s) probably uses a compression technique which is not supported by the ' .
  275. 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
  276. $this->filename
  277. )
  278. );
  279. } else {
  280. throw new Exception('Unable to find xref table.');
  281. }
  282. }
  283. if (!isset($result['xrefLocation'])) {
  284. $result['xrefLocation'] = $tempPos + $xrefPos;
  285. $result['maxObject'] = 0;
  286. }
  287. $cycles = -1;
  288. $bytesPerCycle = 100;
  289. fseek($this->_f, $tempPos = $tempPos + $xrefPos + 4); // set the handle directly after the "xref"-keyword
  290. $data = fread($this->_f, $bytesPerCycle);
  291. while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cycles++, 0))) === false && !feof($this->_f)) {
  292. $data .= fread($this->_f, $bytesPerCycle);
  293. }
  294. if ($trailerPos === false) {
  295. throw new Exception('Trailer keyword not found after xref table');
  296. }
  297. $data = ltrim(substr($data, 0, $trailerPos));
  298. // get Line-Ending
  299. $found = preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for line breaks
  300. if ($found === 0) {
  301. throw new Exception('Xref table seems to be corrupted.');
  302. }
  303. $differentLineEndings = count(array_unique($m[0]));
  304. if ($differentLineEndings > 1) {
  305. $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
  306. } else {
  307. $lines = explode($m[0][0], $data);
  308. }
  309. $data = $differentLineEndings = $m = null;
  310. unset($data, $differentLineEndings, $m);
  311. $linesCount = count($lines);
  312. $start = 1;
  313. for ($i = 0; $i < $linesCount; $i++) {
  314. $line = trim($lines[$i]);
  315. if ($line) {
  316. $pieces = explode(' ', $line);
  317. $c = count($pieces);
  318. switch($c) {
  319. case 2:
  320. $start = (int)$pieces[0];
  321. $end = $start + (int)$pieces[1];
  322. if ($end > $result['maxObject'])
  323. $result['maxObject'] = $end;
  324. break;
  325. case 3:
  326. if (!isset($result['xref'][$start]))
  327. $result['xref'][$start] = array();
  328. if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
  329. $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
  330. }
  331. $start++;
  332. break;
  333. default:
  334. throw new Exception('Unexpected data in xref table');
  335. }
  336. }
  337. }
  338. $lines = $pieces = $line = $start = $end = $gen = null;
  339. unset($lines, $pieces, $line, $start, $end, $gen);
  340. $this->_c->reset($tempPos + $trailerPos + 7);
  341. $trailer = $this->_readValue($this->_c);
  342. if (!isset($result['trailer'])) {
  343. $result['trailer'] = $trailer;
  344. }
  345. if (isset($trailer[1]['/Prev'])) {
  346. $this->_readXref($result, $trailer[1]['/Prev'][1]);
  347. }
  348. $trailer = null;
  349. unset($trailer);
  350. return true;
  351. }
  352. /**
  353. * Reads a PDF value
  354. *
  355. * @param pdf_context $c
  356. * @param string $token A token
  357. * @return mixed
  358. * @throws Exception
  359. */
  360. protected function _readValue(&$c, $token = null)
  361. {
  362. if (is_null($token)) {
  363. $token = $this->_readToken($c);
  364. }
  365. if ($token === false) {
  366. return false;
  367. }
  368. switch ($token) {
  369. case '<':
  370. // This is a hex string.
  371. // Read the value, then the terminator
  372. $pos = $c->offset;
  373. while(1) {
  374. $match = strpos($c->buffer, '>', $pos);
  375. // If you can't find it, try
  376. // reading more data from the stream
  377. if ($match === false) {
  378. if (!$c->increaseLength()) {
  379. return false;
  380. } else {
  381. continue;
  382. }
  383. }
  384. $result = substr($c->buffer, $c->offset, $match - $c->offset);
  385. $c->offset = $match + 1;
  386. return array (self::TYPE_HEX, $result);
  387. }
  388. break;
  389. case '<<':
  390. // This is a dictionary.
  391. $result = array();
  392. // Recurse into this function until we reach
  393. // the end of the dictionary.
  394. while (($key = $this->_readToken($c)) !== '>>') {
  395. if ($key === false) {
  396. return false;
  397. }
  398. if (($value = $this->_readValue($c)) === false) {
  399. return false;
  400. }
  401. // Catch missing value
  402. if ($value[0] == self::TYPE_TOKEN && $value[1] == '>>') {
  403. $result[$key] = array(self::TYPE_NULL);
  404. break;
  405. }
  406. $result[$key] = $value;
  407. }
  408. return array (self::TYPE_DICTIONARY, $result);
  409. case '[':
  410. // This is an array.
  411. $result = array();
  412. // Recurse into this function until we reach
  413. // the end of the array.
  414. while (($token = $this->_readToken($c)) !== ']') {
  415. if ($token === false) {
  416. return false;
  417. }
  418. if (($value = $this->_readValue($c, $token)) === false) {
  419. return false;
  420. }
  421. $result[] = $value;
  422. }
  423. return array (self::TYPE_ARRAY, $result);
  424. case '(':
  425. // This is a string
  426. $pos = $c->offset;
  427. $openBrackets = 1;
  428. do {
  429. for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
  430. switch (ord($c->buffer[$pos])) {
  431. case 0x28: // '('
  432. $openBrackets++;
  433. break;
  434. case 0x29: // ')'
  435. $openBrackets--;
  436. break;
  437. case 0x5C: // backslash
  438. $pos++;
  439. }
  440. }
  441. } while($openBrackets != 0 && $c->increaseLength());
  442. $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
  443. $c->offset = $pos;
  444. return array (self::TYPE_STRING, $result);
  445. case 'stream':
  446. $tempPos = $c->getPos() - strlen($c->buffer);
  447. $tempOffset = $c->offset;
  448. $c->reset($startPos = $tempPos + $tempOffset);
  449. // Find the first "newline"
  450. while ($c->buffer[0] !== chr(10) && $c->buffer[0] !== chr(13)) {
  451. $c->reset(++$startPos);
  452. if ($c->ensureContent() === false) {
  453. throw new Exception(
  454. 'Unable to parse stream data. No newline followed the stream keyword.'
  455. );
  456. }
  457. }
  458. $e = 0; // ensure line breaks in front of the stream
  459. if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
  460. $e++;
  461. if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
  462. $e++;
  463. if ($this->_currentObj[1][1]['/Length'][0] == self::TYPE_OBJREF) {
  464. $tmpLength = $this->resolveObject($this->_currentObj[1][1]['/Length']);
  465. $length = $tmpLength[1][1];
  466. } else {
  467. $length = $this->_currentObj[1][1]['/Length'][1];
  468. }
  469. if ($length > 0) {
  470. $c->reset($startPos + $e, $length);
  471. $v = $c->buffer;
  472. } else {
  473. $v = '';
  474. }
  475. $c->reset($startPos + $e + $length);
  476. $endstream = $this->_readToken($c);
  477. if ($endstream != 'endstream') {
  478. $c->reset($startPos + $e + $length + 9); // 9 = strlen("endstream")
  479. // We don't throw an error here because the next
  480. // round trip will start at a new offset
  481. }
  482. return array(self::TYPE_STREAM, $v);
  483. default:
  484. if (is_numeric($token)) {
  485. // A numeric token. Make sure that
  486. // it is not part of something else.
  487. if (($tok2 = $this->_readToken($c)) !== false) {
  488. if (is_numeric($tok2)) {
  489. // Two numeric tokens in a row.
  490. // In this case, we're probably in
  491. // front of either an object reference
  492. // or an object specification.
  493. // Determine the case and return the data
  494. if (($tok3 = $this->_readToken($c)) !== false) {
  495. switch ($tok3) {
  496. case 'obj':
  497. return array(self::TYPE_OBJDEC, (int)$token, (int)$tok2);
  498. case 'R':
  499. return array(self::TYPE_OBJREF, (int)$token, (int)$tok2);
  500. }
  501. // If we get to this point, that numeric value up
  502. // there was just a numeric value. Push the extra
  503. // tokens back into the stack and return the value.
  504. array_push($c->stack, $tok3);
  505. }
  506. }
  507. array_push($c->stack, $tok2);
  508. }
  509. if ($token === (string)((int)$token))
  510. return array(self::TYPE_NUMERIC, (int)$token);
  511. else
  512. return array(self::TYPE_REAL, (float)$token);
  513. } else if ($token == 'true' || $token == 'false') {
  514. return array(self::TYPE_BOOLEAN, $token == 'true');
  515. } else if ($token == 'null') {
  516. return array(self::TYPE_NULL);
  517. } else {
  518. // Just a token. Return it.
  519. return array(self::TYPE_TOKEN, $token);
  520. }
  521. }
  522. }
  523. /**
  524. * Resolve an object
  525. *
  526. * @param array $objSpec The object-data
  527. * @return array|boolean
  528. * @throws Exception
  529. */
  530. public function resolveObject($objSpec)
  531. {
  532. $c = $this->_c;
  533. // Exit if we get invalid data
  534. if (!is_array($objSpec)) {
  535. return false;
  536. }
  537. if ($objSpec[0] == self::TYPE_OBJREF) {
  538. // This is a reference, resolve it
  539. if (isset($this->_xref['xref'][$objSpec[1]][$objSpec[2]])) {
  540. // Save current file position
  541. // This is needed if you want to resolve
  542. // references while you're reading another object
  543. // (e.g.: if you need to determine the length
  544. // of a stream)
  545. $oldPos = $c->getPos();
  546. // Reposition the file pointer and
  547. // load the object header.
  548. $c->reset($this->_xref['xref'][$objSpec[1]][$objSpec[2]]);
  549. $header = $this->_readValue($c);
  550. if ($header[0] != self::TYPE_OBJDEC || $header[1] != $objSpec[1] || $header[2] != $objSpec[2]) {
  551. $toSearchFor = $objSpec[1] . ' ' . $objSpec[2] . ' obj';
  552. if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {
  553. $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
  554. // reset stack
  555. $c->stack = array();
  556. } else {
  557. throw new Exception(
  558. sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
  559. );
  560. }
  561. }
  562. // If we're being asked to store all the information
  563. // about the object, we add the object ID and generation
  564. // number for later use
  565. $result = array (
  566. self::TYPE_OBJECT,
  567. 'obj' => $objSpec[1],
  568. 'gen' => $objSpec[2]
  569. );
  570. $this->_currentObj =& $result;
  571. // Now simply read the object data until
  572. // we encounter an end-of-object marker
  573. while (true) {
  574. $value = $this->_readValue($c);
  575. if ($value === false || count($result) > 4) {
  576. // in this case the parser couldn't find an "endobj" so we break here
  577. break;
  578. }
  579. if ($value[0] == self::TYPE_TOKEN && $value[1] === 'endobj') {
  580. break;
  581. }
  582. $result[] = $value;
  583. }
  584. $c->reset($oldPos);
  585. if (isset($result[2][0]) && $result[2][0] == self::TYPE_STREAM) {
  586. $result[0] = self::TYPE_STREAM;
  587. }
  588. } else {
  589. throw new Exception(
  590. sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
  591. );
  592. }
  593. return $result;
  594. } else {
  595. return $objSpec;
  596. }
  597. }
  598. /**
  599. * Reads a token from the context
  600. *
  601. * @param pdf_context $c
  602. * @return mixed
  603. */
  604. protected function _readToken($c)
  605. {
  606. // If there is a token available
  607. // on the stack, pop it out and
  608. // return it.
  609. if (count($c->stack)) {
  610. return array_pop($c->stack);
  611. }
  612. // Strip away any whitespace
  613. do {
  614. if (!$c->ensureContent()) {
  615. return false;
  616. }
  617. $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);
  618. } while ($c->offset >= $c->length - 1);
  619. // Get the first character in the stream
  620. $char = $c->buffer[$c->offset++];
  621. switch ($char) {
  622. case '[':
  623. case ']':
  624. case '(':
  625. case ')':
  626. // This is either an array or literal string
  627. // delimiter, Return it
  628. return $char;
  629. case '<':
  630. case '>':
  631. // This could either be a hex string or
  632. // dictionary delimiter. Determine the
  633. // appropriate case and return the token
  634. if ($c->buffer[$c->offset] == $char) {
  635. if (!$c->ensureContent()) {
  636. return false;
  637. }
  638. $c->offset++;
  639. return $char . $char;
  640. } else {
  641. return $char;
  642. }
  643. case '%':
  644. // This is a comment - jump over it!
  645. $pos = $c->offset;
  646. while(1) {
  647. $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
  648. if ($match === 0) {
  649. if (!$c->increaseLength()) {
  650. return false;
  651. } else {
  652. continue;
  653. }
  654. }
  655. $c->offset = $m[0][1] + strlen($m[0][0]);
  656. return $this->_readToken($c);
  657. }
  658. default:
  659. // This is "another" type of token (probably
  660. // a dictionary entry or a numeric value)
  661. // Find the end and return it.
  662. if (!$c->ensureContent()) {
  663. return false;
  664. }
  665. while(1) {
  666. // Determine the length of the token
  667. $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);
  668. if ($c->offset + $pos <= $c->length - 1) {
  669. break;
  670. } else {
  671. // If the script reaches this point,
  672. // the token may span beyond the end
  673. // of the current buffer. Therefore,
  674. // we increase the size of the buffer
  675. // and try again--just to be safe.
  676. $c->increaseLength();
  677. }
  678. }
  679. $result = substr($c->buffer, $c->offset - 1, $pos + 1);
  680. $c->offset += $pos;
  681. return $result;
  682. }
  683. }
  684. /**
  685. * Un-filter a stream object
  686. *
  687. * @param array $obj
  688. * @return string
  689. * @throws Exception
  690. */
  691. protected function _unFilterStream($obj)
  692. {
  693. $filters = array();
  694. if (isset($obj[1][1]['/Filter'])) {
  695. $filter = $obj[1][1]['/Filter'];
  696. if ($filter[0] == pdf_parser::TYPE_OBJREF) {
  697. $tmpFilter = $this->resolveObject($filter);
  698. $filter = $tmpFilter[1];
  699. }
  700. if ($filter[0] == pdf_parser::TYPE_TOKEN) {
  701. $filters[] = $filter;
  702. } else if ($filter[0] == pdf_parser::TYPE_ARRAY) {
  703. $filters = $filter[1];
  704. }
  705. }
  706. $stream = $obj[2][1];
  707. foreach ($filters AS $filter) {
  708. switch ($filter[1]) {
  709. case '/FlateDecode':
  710. case '/Fl':
  711. if (function_exists('gzuncompress')) {
  712. $oStream = $stream;
  713. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  714. } else {
  715. throw new Exception(
  716. sprintf('To handle %s filter, please compile php with zlib support.', $filter[1])
  717. );
  718. }
  719. if ($stream === false) {
  720. $tries = 0;
  721. while ($tries < 8 && ($stream === false || strlen($stream) < strlen($oStream))) {
  722. $oStream = substr($oStream, 1);
  723. $stream = @gzinflate($oStream);
  724. $tries++;
  725. }
  726. if ($stream === false) {
  727. throw new Exception('Error while decompressing stream.');
  728. }
  729. }
  730. break;
  731. case '/LZWDecode':
  732. if (!class_exists('FilterLZW')) {
  733. require_once('filters/FilterLZW.php');
  734. }
  735. $decoder = new FilterLZW();
  736. $stream = $decoder->decode($stream);
  737. break;
  738. case '/ASCII85Decode':
  739. if (!class_exists('FilterASCII85')) {
  740. require_once('filters/FilterASCII85.php');
  741. }
  742. $decoder = new FilterASCII85();
  743. $stream = $decoder->decode($stream);
  744. break;
  745. case '/ASCIIHexDecode':
  746. if (!class_exists('FilterASCIIHexDecode')) {
  747. require_once('filters/FilterASCIIHexDecode.php');
  748. }
  749. $decoder = new FilterASCIIHexDecode();
  750. $stream = $decoder->decode($stream);
  751. break;
  752. case null:
  753. break;
  754. default:
  755. throw new Exception(sprintf('Unsupported Filter: %s', $filter[1]));
  756. }
  757. }
  758. return $stream;
  759. }
  760. }