Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

fpdi_pdf_parser.php 8.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. <?php
  2. /**
  3. * This file is part of FPDI
  4. *
  5. * @package FPDI
  6. * @copyright Copyright (c) 2015 Setasign - Jan Slabon (http://www.setasign.com)
  7. * @license http://opensource.org/licenses/mit-license The MIT License
  8. * @version 1.6.1
  9. */
  10. if (!class_exists('pdf_parser')) {
  11. require_once('pdf_parser.php');
  12. }
  13. /**
  14. * Class fpdi_pdf_parser
  15. */
  16. class fpdi_pdf_parser extends pdf_parser
  17. {
  18. /**
  19. * Pages
  20. *
  21. * Index begins at 0
  22. *
  23. * @var array
  24. */
  25. protected $_pages;
  26. /**
  27. * Page count
  28. *
  29. * @var integer
  30. */
  31. protected $_pageCount;
  32. /**
  33. * Current page number
  34. *
  35. * @var integer
  36. */
  37. public $pageNo;
  38. /**
  39. * PDF version of imported document
  40. *
  41. * @var string
  42. */
  43. public $_pdfVersion;
  44. /**
  45. * Available BoxTypes
  46. *
  47. * @var array
  48. */
  49. public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
  50. /**
  51. * The constructor.
  52. *
  53. * @param string $filename The source filename
  54. */
  55. public function __construct($filename)
  56. {
  57. parent::__construct($filename);
  58. // resolve Pages-Dictonary
  59. $pages = $this->resolveObject($this->_root[1][1]['/Pages']);
  60. // Read pages
  61. $this->_readPages($pages, $this->_pages);
  62. // count pages;
  63. $this->_pageCount = count($this->_pages);
  64. }
  65. /**
  66. * Get page count from source file.
  67. *
  68. * @return int
  69. */
  70. public function getPageCount()
  71. {
  72. return $this->_pageCount;
  73. }
  74. /**
  75. * Set the page number.
  76. *
  77. * @param int $pageNo Page number to use
  78. * @throws InvalidArgumentException
  79. */
  80. public function setPageNo($pageNo)
  81. {
  82. $pageNo = ((int) $pageNo) - 1;
  83. if ($pageNo < 0 || $pageNo >= $this->getPageCount()) {
  84. throw new InvalidArgumentException('Invalid page number!');
  85. }
  86. $this->pageNo = $pageNo;
  87. }
  88. /**
  89. * Get page-resources from current page
  90. *
  91. * @return array|boolean
  92. */
  93. public function getPageResources()
  94. {
  95. return $this->_getPageResources($this->_pages[$this->pageNo]);
  96. }
  97. /**
  98. * Get page-resources from a /Page dictionary.
  99. *
  100. * @param array $obj Array of pdf-data
  101. * @return array|boolean
  102. */
  103. protected function _getPageResources($obj)
  104. {
  105. $obj = $this->resolveObject($obj);
  106. // If the current object has a resources
  107. // dictionary associated with it, we use
  108. // it. Otherwise, we move back to its
  109. // parent object.
  110. if (isset($obj[1][1]['/Resources'])) {
  111. $res = $this->resolveObject($obj[1][1]['/Resources']);
  112. if ($res[0] == pdf_parser::TYPE_OBJECT)
  113. return $res[1];
  114. return $res;
  115. }
  116. if (!isset($obj[1][1]['/Parent'])) {
  117. return false;
  118. }
  119. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  120. if ($res[0] == pdf_parser::TYPE_OBJECT)
  121. return $res[1];
  122. return $res;
  123. }
  124. /**
  125. * Get content of current page.
  126. *
  127. * If /Contents is an array, the streams are concatenated
  128. *
  129. * @return string
  130. */
  131. public function getContent()
  132. {
  133. $buffer = '';
  134. if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) {
  135. $contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']);
  136. foreach ($contents AS $tmpContent) {
  137. $buffer .= $this->_unFilterStream($tmpContent) . ' ';
  138. }
  139. }
  140. return $buffer;
  141. }
  142. /**
  143. * Resolve all content objects.
  144. *
  145. * @param array $contentRef
  146. * @return array
  147. */
  148. protected function _getPageContent($contentRef)
  149. {
  150. $contents = array();
  151. if ($contentRef[0] == pdf_parser::TYPE_OBJREF) {
  152. $content = $this->resolveObject($contentRef);
  153. if ($content[1][0] == pdf_parser::TYPE_ARRAY) {
  154. $contents = $this->_getPageContent($content[1]);
  155. } else {
  156. $contents[] = $content;
  157. }
  158. } else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) {
  159. foreach ($contentRef[1] AS $tmp_content_ref) {
  160. $contents = array_merge($contents, $this->_getPageContent($tmp_content_ref));
  161. }
  162. }
  163. return $contents;
  164. }
  165. /**
  166. * Get a boundary box from a page
  167. *
  168. * Array format is same as used by FPDF_TPL.
  169. *
  170. * @param array $page a /Page dictionary
  171. * @param string $boxIndex Type of box {see {@link $availableBoxes})
  172. * @param float Scale factor from user space units to points
  173. *
  174. * @return array|boolean
  175. */
  176. protected function _getPageBox($page, $boxIndex, $k)
  177. {
  178. $page = $this->resolveObject($page);
  179. $box = null;
  180. if (isset($page[1][1][$boxIndex])) {
  181. $box = $page[1][1][$boxIndex];
  182. }
  183. if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) {
  184. $tmp_box = $this->resolveObject($box);
  185. $box = $tmp_box[1];
  186. }
  187. if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) {
  188. $b = $box[1];
  189. return array(
  190. 'x' => $b[0][1] / $k,
  191. 'y' => $b[1][1] / $k,
  192. 'w' => abs($b[0][1] - $b[2][1]) / $k,
  193. 'h' => abs($b[1][1] - $b[3][1]) / $k,
  194. 'llx' => min($b[0][1], $b[2][1]) / $k,
  195. 'lly' => min($b[1][1], $b[3][1]) / $k,
  196. 'urx' => max($b[0][1], $b[2][1]) / $k,
  197. 'ury' => max($b[1][1], $b[3][1]) / $k,
  198. );
  199. } else if (!isset($page[1][1]['/Parent'])) {
  200. return false;
  201. } else {
  202. return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k);
  203. }
  204. }
  205. /**
  206. * Get all page boundary boxes by page number
  207. *
  208. * @param int $pageNo The page number
  209. * @param float $k Scale factor from user space units to points
  210. * @return array
  211. * @throws InvalidArgumentException
  212. */
  213. public function getPageBoxes($pageNo, $k)
  214. {
  215. if (!isset($this->_pages[$pageNo - 1])) {
  216. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
  217. }
  218. return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k);
  219. }
  220. /**
  221. * Get all boxes from /Page dictionary
  222. *
  223. * @param array $page A /Page dictionary
  224. * @param float $k Scale factor from user space units to points
  225. * @return array
  226. */
  227. protected function _getPageBoxes($page, $k)
  228. {
  229. $boxes = array();
  230. foreach($this->availableBoxes AS $box) {
  231. if ($_box = $this->_getPageBox($page, $box, $k)) {
  232. $boxes[$box] = $_box;
  233. }
  234. }
  235. return $boxes;
  236. }
  237. /**
  238. * Get the page rotation by page number
  239. *
  240. * @param integer $pageNo
  241. * @throws InvalidArgumentException
  242. * @return array
  243. */
  244. public function getPageRotation($pageNo)
  245. {
  246. if (!isset($this->_pages[$pageNo - 1])) {
  247. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
  248. }
  249. return $this->_getPageRotation($this->_pages[$pageNo - 1]);
  250. }
  251. /**
  252. * Get the rotation value of a page
  253. *
  254. * @param array $obj A /Page dictionary
  255. * @return array|bool
  256. */
  257. protected function _getPageRotation($obj)
  258. {
  259. $obj = $this->resolveObject($obj);
  260. if (isset($obj[1][1]['/Rotate'])) {
  261. $res = $this->resolveObject($obj[1][1]['/Rotate']);
  262. if ($res[0] == pdf_parser::TYPE_OBJECT)
  263. return $res[1];
  264. return $res;
  265. }
  266. if (!isset($obj[1][1]['/Parent'])) {
  267. return false;
  268. }
  269. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  270. if ($res[0] == pdf_parser::TYPE_OBJECT)
  271. return $res[1];
  272. return $res;
  273. }
  274. /**
  275. * Read all pages
  276. *
  277. * @param array $pages /Pages dictionary
  278. * @param array $result The result array
  279. * @throws Exception
  280. */
  281. protected function _readPages(&$pages, &$result)
  282. {
  283. // Get the kids dictionary
  284. $_kids = $this->resolveObject($pages[1][1]['/Kids']);
  285. if (!is_array($_kids)) {
  286. throw new Exception('Cannot find /Kids in current /Page-Dictionary');
  287. }
  288. if ($_kids[0] === self::TYPE_OBJECT) {
  289. $_kids = $_kids[1];
  290. }
  291. $kids = $_kids[1];
  292. foreach ($kids as $v) {
  293. $pg = $this->resolveObject($v);
  294. if ($pg[1][1]['/Type'][1] === '/Pages') {
  295. // If one of the kids is an embedded
  296. // /Pages array, resolve it as well.
  297. $this->_readPages($pg, $result);
  298. } else {
  299. $result[] = $pg;
  300. }
  301. }
  302. }
  303. }