<?php
//
//  FPDI - Version 1.1
//
//    Copyright 2004,2005 Setasign - Jan Slabon
//
//  Licensed under the Apache License, Version 2.0 (the "License");
//  you may not use this file except in compliance with the License.
//  You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
//  Unless required by applicable law or agreed to in writing, software
//  distributed under the License is distributed on an "AS IS" BASIS,
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//  See the License for the specific language governing permissions and
//  limitations under the License.
//

if (!defined ('PDF_TYPE_NULL'))
    define ('PDF_TYPE_NULL', 0);
if (!defined ('PDF_TYPE_NUMERIC'))
    define ('PDF_TYPE_NUMERIC', 1);
if (!defined ('PDF_TYPE_TOKEN'))
    define ('PDF_TYPE_TOKEN', 2);
if (!defined ('PDF_TYPE_HEX'))
    define ('PDF_TYPE_HEX', 3);
if (!defined ('PDF_TYPE_STRING'))
    define ('PDF_TYPE_STRING', 4);
if (!defined ('PDF_TYPE_DICTIONARY'))
    define ('PDF_TYPE_DICTIONARY', 5);
if (!defined ('PDF_TYPE_ARRAY'))
    define ('PDF_TYPE_ARRAY', 6);
if (!defined ('PDF_TYPE_OBJDEC'))
    define ('PDF_TYPE_OBJDEC', 7);
if (!defined ('PDF_TYPE_OBJREF'))
    define ('PDF_TYPE_OBJREF', 8);
if (!defined ('PDF_TYPE_OBJECT'))
    define ('PDF_TYPE_OBJECT', 9);
if (!defined ('PDF_TYPE_STREAM'))
    define ('PDF_TYPE_STREAM', 10);


require_once("wrapper_functions.php");
require_once("pdf_parser.php");

class fpdi_pdf_parser extends pdf_parser {

    /**
     * Pages
     * Index beginns at 0
     *
     * @var array
     */
    var $pages;
    
    /**
     * Page count
     * @var integer
     */
    var $page_count;
    
    /**
     * actual page number
     * @var integer
     */
    var $pageno;
    
    /**
     * PDF Version of imported Document
     * @var string
     */
    var $pdfVersion;
    
    /**
     * FPDI Reference
     * @var object
     */
    var $fpdi;
    
    /**
     * Constructor
     *
     * @param string $filename  Source-Filename
     * @param object $fpdi      Object of type fpdi
     */
    function __construct($filename,&$fpdi) {
        $this->fpdi =& $fpdi;
		$this->filename = $filename;
		
        parent::__construct($filename);

        // Get Info
        $this->getInfo();

        // resolve Pages-Dictonary
        $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);

        // Read pages
        $this->read_pages($this->c, $pages, $this->pages);
        
        // count pages;
        $this->page_count = count($this->pages);
    }
    
    /**
     * Overwrite parent::error()
     *
     * @param string $msg  Error-Message
     */
    function error($msg) {
    	$this->fpdi->error($msg);	
    }
    
    /**
     * Get pagecount from sourcefile
     *
     * @return int
     */
    function getPageCount() {
        return $this->page_count;
    }


    /**
     * Set pageno
     *
     * @param int $pageno Pagenumber to use
     */
    function setPageno($pageno) {
        $pageno-=1;

        if ($pageno < 0 || $pageno >= $this->getPageCount()) {
            $this->fpdi->error("Pagenumber is wrong!");
        }

        $this->pageno = $pageno;
    }
    
    /**
     * Get page-resources from current page
     *
     * @return array
     */
    function getPageResources() {
        return $this->_getPageResources($this->pages[$this->pageno]);
    }
    
    /**
     * Get page-resources from /Page
     *
     * @param array $obj Array of pdf-data
     */
    function _getPageResources ($obj) { // $obj = /Page
    	$obj = $this->pdf_resolve_object($this->c, $obj);

        // If the current object has a resources
    	// dictionary associated with it, we use
    	// it. Otherwise, we move back to its
    	// parent object.
        if (isset ($obj[1][1]['/Resources'])) {
    		$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
    		if ($res[0] == PDF_TYPE_OBJECT)
                return $res[1];
            return $res;
    	} else {
    		if (!isset ($obj[1][1]['/Parent'])) {
    			return false;
    		} else {
                $res = $this->_getPageResources($obj[1][1]['/Parent']);
                if ($res[0] == PDF_TYPE_OBJECT)
                    return $res[1];
                return $res;
    		}
    	}
    }


    function getInfo() {
        $avail_infos = array("Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped");

        $_infos = $this->pdf_resolve_object($this->c,$this->xref['trailer'][1]['/Info']);
        $infos = array();

        foreach ($avail_infos AS $info) {
            if (isset($_infos[1][1]["/".$info])) {
                if ($_infos[1][1]["/".$info][0] == PDF_TYPE_STRING) {
                    $infos[$info] = $this->deescapeString($_infos[1][1]["/".$info][1]);
                } else if ($_infos[1][1]["/".$info][0] == PDF_TYPE_HEX) {
                    $infos[$info] = $this->hex2String($_infos[1][1]["/".$info][1]);
                }
            }
        }
        $this->infos = $infos;
    }

    /**
     * Rebuilds a hexstring to string
     *
     * @param string $hex hexstring
     * @return string
     */
    function hex2String($hex) {
    	$endian = false;

        if (preg_match("/^FEFF/",$hex)) { // is utf-16 aka big endian
            $i = 4;
            $endian = "big";
        } else if (preg_match("/^FFFE/",$hex)) { // is utf-16 aka little endian
            $i = 4;
            $endian = "little";
        } else {
            $i = 0;
        }

        $s = "";
        $l = strlen($hex);
        for (; $i < $l; $i+=2) {
        	if (!$endian) {
	            $s .= chr(hexdec($hex[$i].(isset($hex[$i+1]) ? $hex[$i+1] : '0')));
        	} else {
                if ($endian == "big") {
                    $_c = $hex[$i].$hex[$i+1];
                    $i+=2;
                    $c = $hex[$i].$hex[$i+1];
                    
                    if ($_c != "00") {
                        $s .= "?";
                        continue;
                    } else {
                        $s .= chr(hexdec($c));
                        continue;
                    }
                } else if ($endian == "little") {
                    $c = $hex[$i].$hex[$i+1];
                    $i+=2;
                    $_c = $hex[$i].$hex[$i+1];
                    
                    if ($_c != "00") {
                        $s .= "?";
                        continue;
                    } else {
                        $s .= chr(hexdec($c));
                        continue;
                    }
                }
            }
        }
        
        return $s;
    }

    function deescapeString($s) {
      $ret = preg_replace_callback(
        '/\\\(\d{1,3})/',
        function ($treffer) {
          return chr(octdec($treffer[0]));
        },
        $s
      );
      $torepl = array(
        "/\\\\\(/" => "(",
        "/\\\\\)/" => ")");
      return preg_replace(array_keys($torepl),$torepl,$ret);
      /*

        $torepl = array("/\\\(\d{1,3})/e" => "chr(octdec(\\1))",
                        "/\\\\\(/" => "(",
                        "/\\\\\)/" => ")");
        return preg_replace(array_keys($torepl),$torepl,$s);*/
    }

    

    /**
     * Get content of current page
     *
     * If more /Contents is an array, the streams are concated
     *
     * @return string
     */
    function getContent() {
        $buffer = "";
        
        $contents = $this->getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
        foreach($contents AS $tmp_content) {
            $buffer .= $this->rebuildContentStream($tmp_content);
        }
        
        return $buffer;
    }
    
    
    /**
     * Resolve all content-objects
     *
     * @param array $content_ref
     * @return array
     */
    function getPageContent($content_ref) {
        $contents = array();
        
        if ($content_ref[0] == PDF_TYPE_OBJREF) {
            $content = $this->pdf_resolve_object($this->c, $content_ref);
            if ($content[1][0] == PDF_TYPE_ARRAY) {
                $contents = $this->getPageContent($content[1]);
            } else {
                $contents[] = $content;
            }
        } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
            foreach ($content_ref[1] AS $tmp_content_ref) {
                $contents = array_merge($contents,$this->getPageContent($tmp_content_ref));
            }
        }

        return $contents;
    }


    /**
     * Rebuild content-streams
     * only non-compressed streams and /FlateDecode are ready!
     *
     * @param array $obj
     * @return string
     */
    function rebuildContentStream($obj) {
        $filters = array();
        
        if (isset($obj[1][1]['/Filter'])) {
            $_filter = $obj[1][1]['/Filter'];

            if ($_filter[0] == PDF_TYPE_TOKEN) {
                $filters[] = $_filter;
            } else if ($_filter[0] == PDF_TYPE_ARRAY) {
                $filters = $_filter[1];
            }
        }

        $stream = $obj[2][1];
		           	   
        foreach ($filters AS $_filter) {
            switch ($_filter[1]) {
                case "/FlateDecode":
                if (function_exists('gzuncompress')) {
                   	   $stream = @gzuncompress($stream);
                   } else {
                       $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
                   }
                   if ($stream === false) {
                       $this->fpdi->error("Error while decompressing string.");
                   }

                break;
                case "/LZWDecode":
                	@include_once("decoders/lzw.php");
                    if (class_exists("LZWDecode")) {
	                    $lzwdec = new LZWDecode($this->fpdi);
	                    $stream = $lzwdec->decode($stream);
                    } else {
						$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
                    }
                break;
                case "/ASCII85Decode":
                    @include_once("decoders/ascii85.php");
                    if (class_exists("ASCII85Decode")) {
	                	$ascii85 = new ASCII85Decode($this->fpdi);
	                    $stream = $ascii85->decode(trim($stream));
                    } else {
                    	$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
                    }
                break;
                case null:
                   $stream = $stream;
                break;
                default:
                   $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
            }
        }
        
        return $stream;
    }
    
    /**
     * Get MediaBox
     *
     * gets an array that describes the size of a page.
     *
     * @param integer $pageno
     * @return array @see getPageBox()
     */
    function getPageMediaBox($pageno) {
        return $this->getPageBox($this->pages[$pageno-1],"/MediaBox");
    }


    /**
     * Get a Box from a page
     * Arrayformat is same as used by fpdf_tpl
     *
     * @param array $page a /Page
     * @param string $box_index Type of Box @see getPageBoxes()
     * @return array
     */
    function getPageBox($page, $box_index) {
        $page = $this->pdf_resolve_object($this->c,$page);
        
        $box = null;
        if (isset($page[1][1][$box_index]))
            $box =& $page[1][1][$box_index];
        
        if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
            $tmp_box = $this->pdf_resolve_object($this->c,$box);
            $box = $tmp_box[1];
        }
            
        if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
            $b =& $box[1];
            return array("x" => $b[0][1]/$this->fpdi->k,
                         "y" => $b[1][1]/$this->fpdi->k,
                         "w" => $b[2][1]/$this->fpdi->k,
                         "h" => $b[3][1]/$this->fpdi->k);
        } else if (!isset ($page[1][1]['/Parent'])) {
            return false;
        } else {
            return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
        }
    }

    /**
     * Get all Boxes from /Page
     *
     * @param array a /Page
     * @return array
     */
    function getPageBoxes($page) {
        $_boxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
        $boxes = array();

        foreach($_boxes AS $box) {
            if ($_box = $this->getPageBox($page,$box)) {
                $boxes[$box] = $_box;
            }
        }

        return $boxes;
    }


    /**
     * Read all /Page(es)
     *
     * @param object pdf_context
     * @param array /Pages
     * @param array the result-array
     */
    function read_pages (&$c, &$pages, &$result) {

        // Get the kids dictionary
    	$kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);

        if (!is_array($kids))
            $this->fpdi->Error("Cannot find /Kids in current /Page-Dictionary");
        foreach ($kids[1] as $v) {
    		$pg = $this->pdf_resolve_object ($c, $v);
            #print_r($pg);

    		if ($pg[1][1]['/Type'][1] === '/Pages') {
                // If one of the kids is an embedded
    			// /Pages array, resolve it as well.
                $this->read_pages ($c, $pg, $result);
    		} else {
    			$result[] = $pg;
    		}
    	}
    }

    
    
    /**
     * Get PDF-Version
     *
     * And reset the PDF Version used in FPDI if needed
     */
    function getPDFVersion() {
        parent::getPDFVersion();
    	
        if (isset($this->fpdi->importVersion) && $this->pdfVersion > $this->fpdi->importVersion) {
            $this->fpdi->importVersion = $this->pdfVersion;
        }
    }
    
}

?>