|
- <?php
- /**
- * This file is part of FPDI
- *
- * @package FPDI
- * @copyright Copyright (c) 2015 Setasign - Jan Slabon (http://www.setasign.com)
- * @license http://opensource.org/licenses/mit-license The MIT License
- * @version 1.6.1
- */
-
- if (!class_exists('pdf_parser')) {
- require_once('pdf_parser.php');
- }
-
- /**
- * Class fpdi_pdf_parser
- */
- class fpdi_pdf_parser extends pdf_parser
- {
- /**
- * Pages
- *
- * Index begins at 0
- *
- * @var array
- */
- protected $_pages;
-
- /**
- * Page count
- *
- * @var integer
- */
- protected $_pageCount;
-
- /**
- * Current page number
- *
- * @var integer
- */
- public $pageNo;
-
- /**
- * PDF version of imported document
- *
- * @var string
- */
- public $_pdfVersion;
-
- /**
- * Available BoxTypes
- *
- * @var array
- */
- public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
-
- /**
- * The constructor.
- *
- * @param string $filename The source filename
- */
- public function __construct($filename)
- {
- parent::__construct($filename);
-
- // resolve Pages-Dictonary
- $pages = $this->resolveObject($this->_root[1][1]['/Pages']);
-
- // Read pages
- $this->_readPages($pages, $this->_pages);
-
- // count pages;
- $this->_pageCount = count($this->_pages);
- }
-
- /**
- * Get page count from source file.
- *
- * @return int
- */
- public function getPageCount()
- {
- return $this->_pageCount;
- }
-
- /**
- * Set the page number.
- *
- * @param int $pageNo Page number to use
- * @throws InvalidArgumentException
- */
- public function setPageNo($pageNo)
- {
- $pageNo = ((int) $pageNo) - 1;
-
- if ($pageNo < 0 || $pageNo >= $this->getPageCount()) {
- throw new InvalidArgumentException('Invalid page number!');
- }
-
- $this->pageNo = $pageNo;
- }
-
- /**
- * Get page-resources from current page
- *
- * @return array|boolean
- */
- public function getPageResources()
- {
- return $this->_getPageResources($this->_pages[$this->pageNo]);
- }
-
- /**
- * Get page-resources from a /Page dictionary.
- *
- * @param array $obj Array of pdf-data
- * @return array|boolean
- */
- protected function _getPageResources($obj)
- {
- $obj = $this->resolveObject($obj);
-
- // If the current object has a resources
- // dictionary associated with it, we use
- // it. Otherwise, we move back to its
- // parent object.
- if (isset($obj[1][1]['/Resources'])) {
- $res = $this->resolveObject($obj[1][1]['/Resources']);
- if ($res[0] == pdf_parser::TYPE_OBJECT)
- return $res[1];
- return $res;
- }
-
- if (!isset($obj[1][1]['/Parent'])) {
- return false;
- }
-
- $res = $this->_getPageResources($obj[1][1]['/Parent']);
- if ($res[0] == pdf_parser::TYPE_OBJECT)
- return $res[1];
- return $res;
- }
-
- /**
- * Get content of current page.
- *
- * If /Contents is an array, the streams are concatenated
- *
- * @return string
- */
- public function getContent()
- {
- $buffer = '';
-
- if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) {
- $contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']);
- foreach ($contents AS $tmpContent) {
- $buffer .= $this->_unFilterStream($tmpContent) . ' ';
- }
- }
-
- return $buffer;
- }
-
- /**
- * Resolve all content objects.
- *
- * @param array $contentRef
- * @return array
- */
- protected function _getPageContent($contentRef)
- {
- $contents = array();
-
- if ($contentRef[0] == pdf_parser::TYPE_OBJREF) {
- $content = $this->resolveObject($contentRef);
- if ($content[1][0] == pdf_parser::TYPE_ARRAY) {
- $contents = $this->_getPageContent($content[1]);
- } else {
- $contents[] = $content;
- }
- } else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) {
- foreach ($contentRef[1] AS $tmp_content_ref) {
- $contents = array_merge($contents, $this->_getPageContent($tmp_content_ref));
- }
- }
-
- return $contents;
- }
-
- /**
- * Get a boundary box from a page
- *
- * Array format is same as used by FPDF_TPL.
- *
- * @param array $page a /Page dictionary
- * @param string $boxIndex Type of box {see {@link $availableBoxes})
- * @param float Scale factor from user space units to points
- *
- * @return array|boolean
- */
- protected function _getPageBox($page, $boxIndex, $k)
- {
- $page = $this->resolveObject($page);
- $box = null;
- if (isset($page[1][1][$boxIndex])) {
- $box = $page[1][1][$boxIndex];
- }
-
- if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) {
- $tmp_box = $this->resolveObject($box);
- $box = $tmp_box[1];
- }
-
- if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) {
- $b = $box[1];
- return array(
- 'x' => $b[0][1] / $k,
- 'y' => $b[1][1] / $k,
- 'w' => abs($b[0][1] - $b[2][1]) / $k,
- 'h' => abs($b[1][1] - $b[3][1]) / $k,
- 'llx' => min($b[0][1], $b[2][1]) / $k,
- 'lly' => min($b[1][1], $b[3][1]) / $k,
- 'urx' => max($b[0][1], $b[2][1]) / $k,
- 'ury' => max($b[1][1], $b[3][1]) / $k,
- );
- } else if (!isset($page[1][1]['/Parent'])) {
- return false;
- } else {
- return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k);
- }
- }
-
- /**
- * Get all page boundary boxes by page number
- *
- * @param int $pageNo The page number
- * @param float $k Scale factor from user space units to points
- * @return array
- * @throws InvalidArgumentException
- */
- public function getPageBoxes($pageNo, $k)
- {
- if (!isset($this->_pages[$pageNo - 1])) {
- throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
- }
-
- return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k);
- }
-
- /**
- * Get all boxes from /Page dictionary
- *
- * @param array $page A /Page dictionary
- * @param float $k Scale factor from user space units to points
- * @return array
- */
- protected function _getPageBoxes($page, $k)
- {
- $boxes = array();
-
- foreach($this->availableBoxes AS $box) {
- if ($_box = $this->_getPageBox($page, $box, $k)) {
- $boxes[$box] = $_box;
- }
- }
-
- return $boxes;
- }
-
- /**
- * Get the page rotation by page number
- *
- * @param integer $pageNo
- * @throws InvalidArgumentException
- * @return array
- */
- public function getPageRotation($pageNo)
- {
- if (!isset($this->_pages[$pageNo - 1])) {
- throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
- }
-
- return $this->_getPageRotation($this->_pages[$pageNo - 1]);
- }
-
- /**
- * Get the rotation value of a page
- *
- * @param array $obj A /Page dictionary
- * @return array|bool
- */
- protected function _getPageRotation($obj)
- {
- $obj = $this->resolveObject($obj);
- if (isset($obj[1][1]['/Rotate'])) {
- $res = $this->resolveObject($obj[1][1]['/Rotate']);
- if ($res[0] == pdf_parser::TYPE_OBJECT)
- return $res[1];
- return $res;
- }
-
- if (!isset($obj[1][1]['/Parent'])) {
- return false;
- }
-
- $res = $this->_getPageRotation($obj[1][1]['/Parent']);
- if ($res[0] == pdf_parser::TYPE_OBJECT)
- return $res[1];
-
- return $res;
- }
-
- /**
- * Read all pages
- *
- * @param array $pages /Pages dictionary
- * @param array $result The result array
- * @throws Exception
- */
- protected function _readPages(&$pages, &$result)
- {
- // Get the kids dictionary
- $_kids = $this->resolveObject($pages[1][1]['/Kids']);
-
- if (!is_array($_kids)) {
- throw new Exception('Cannot find /Kids in current /Page-Dictionary');
- }
-
- if ($_kids[0] === self::TYPE_OBJECT) {
- $_kids = $_kids[1];
- }
-
- $kids = $_kids[1];
-
- foreach ($kids as $v) {
- $pg = $this->resolveObject($v);
- if ($pg[1][1]['/Type'][1] === '/Pages') {
- // If one of the kids is an embedded
- // /Pages array, resolve it as well.
- $this->_readPages($pg, $result);
- } else {
- $result[] = $pg;
- }
- }
- }
- }
|