U
    
3g5ˆ  ã                *   @  sÄ  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZmZmZmZ dddœdd„Zd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2œ)Zd3d4„ ZG d5d6„ d6eƒZd7dd8d9œd:d;„ZG d<d=„ d=eƒZG d>d?„ d?eƒZG d@dA„ dAeƒZG dBdC„ dCƒZG dDdE„ dEƒZG dFdG„ dGee ƒZerxejeeef ef ZnejZG dHdI„ dIeƒZ G dJdK„ dKƒZ!G dLdM„ dMƒZ"dNdO„ Z#G dPdQ„ dQƒZ$dS )Ré    )ÚannotationsN)ÚTYPE_CHECKINGÚAnyÚListÚ
NamedTupleÚUnionÚstrÚbytes)ÚsÚreturnc                 C  s   t j|  d¡ S )NÚ	utf_16_be)ÚcodecsÚBOM_UTF16_BEÚencode)r
   © r   ú1/tmp/pip-unpacked-wheel-tbvhwjp8/PIL/PdfParser.pyÚencode_text   s    r   úu   Ë˜u   Ë‡u   Ë†u   Ë™u   Ëu   Ë›u   Ëšu   Ëœu   â€¢u   â€ u   â€¡u   â€¦u   â€”u   â€“u   Æ’u   â„u   â€¹u   â€ºu   âˆ’u   â€°u   â€žu   â€œu   â€u   â€˜u   â€™u   â€šu   â„¢u   ï¬u   ï¬‚u   Åu   Å’u   Å u   Å¸u   Å½u   Ä±u   Å‚u   Å“u   Å¡u   Å¾u   â‚¬))é   é   é   é   é   é   é   é   é   é€   é   é‚   éƒ   é„   é…   é†   é‡   éˆ   é‰   éŠ   é‹   éŒ   é   éŽ   é   é   é‘   é’   é“   é”   é•   é–   é—   é˜   é™   éš   é›   éœ   é   éž   é    c                 C  sH   | d t tjƒ… tjkr0| t tjƒd …  d¡S d dd„ | D ƒ¡S d S )Nr   Ú c                 s  s   | ]}t  |t|ƒ¡V  qd S ©N)ÚPDFDocEncodingÚgetÚchr)Ú.0Úbyter   r   r   Ú	<genexpr>E   s     zdecode_text.<locals>.<genexpr>)Úlenr   r   ÚdecodeÚjoin)Úbr   r   r   Údecode_textA   s    rI   c                   @  s   e Zd ZdZdS )ÚPdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the
    PDF file structureN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   rJ   H   s   rJ   ÚboolÚNone)Ú	conditionÚerror_messager   c                 C  s   | st |ƒ‚d S r>   )rJ   )rQ   rR   r   r   r   Úcheck_format_conditionO   s    rS   c                   @  s   e Zd ZU ded< ded< dS )ÚIndirectReferenceTupleÚintÚ	object_idÚ
generationN)rK   rL   rM   Ú__annotations__r   r   r   r   rT   T   s   
rT   c                   @  sN   e Zd Zddœdd„Zddœdd„Zdd	d
œdd„Zdd„ Zddœdd„ZdS )ÚIndirectReferencer   ©r   c                 C  s   | j › d| j› dS )Nú z R©rV   rW   ©Úselfr   r   r   Ú__str__Z   s    zIndirectReference.__str__r	   c                 C  s   |   ¡  d¡S ©Núus-ascii)r_   r   r]   r   r   r   Ú	__bytes__]   s    zIndirectReference.__bytes__ÚobjectrO   )Úotherr   c                 C  s6   | j |j k	rdS t|tƒst‚|j| jko4|j| jkS )NF)Ú	__class__Ú
isinstancerY   ÚAssertionErrorrV   rW   ©r^   rd   r   r   r   Ú__eq__`   s    zIndirectReference.__eq__c                 C  s
   | |k S r>   r   rh   r   r   r   Ú__ne__f   s    zIndirectReference.__ne__rU   c                 C  s   t | j| jfƒS r>   )ÚhashrV   rW   r]   r   r   r   Ú__hash__i   s    zIndirectReference.__hash__N)rK   rL   rM   r_   rb   ri   rj   rl   r   r   r   r   rY   Y   s
   rY   c                   @  s   e Zd Zddœdd„ZdS )ÚIndirectObjectDefr   rZ   c                 C  s   | j › d| j› dS )Nr[   z objr\   r]   r   r   r   r_   n   s    zIndirectObjectDef.__str__N)rK   rL   rM   r_   r   r   r   r   rm   m   s   rm   c                   @  sR   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zddœdd„Zdd„ Z	dd„ Z
dS )Ú	XrefTablec                 C  s    i | _ i | _ddi| _d| _d S )Nr   i   F)Úexisting_entriesÚnew_entriesÚdeleted_entriesÚreading_finishedr]   r   r   r   Ú__init__s   s    
zXrefTable.__init__c                 C  s2   | j r|| j|< n
|| j|< || jkr.| j|= d S r>   )rr   rp   ro   rq   ©r^   ÚkeyÚvaluer   r   r   Ú__setitem__y   s
    

zXrefTable.__setitem__c                 C  s0   z| j | W S  tk
r*   | j|  Y S X d S r>   )rp   ÚKeyErrorro   ©r^   ru   r   r   r   Ú__getitem__   s    zXrefTable.__getitem__c                 C  s†   || j kr0| j | d d }| j |= || j|< nR|| jkrX| j| d d }|| j|< n*|| jkrn| j| }nd|› d}t|ƒ‚d S )Né   z
object ID z+ cannot be deleted because it doesn't exist)rp   rq   ro   Ú
IndexError)r^   ru   rW   Úmsgr   r   r   Ú__delitem__‡   s    


zXrefTable.__delitem__c                 C  s   || j kp|| jkS r>   )ro   rp   ry   r   r   r   Ú__contains__•   s    zXrefTable.__contains__rU   rZ   c                 C  s.   t t| j ¡ ƒt| j ¡ ƒB t| j ¡ ƒB ƒS r>   )rE   Úsetro   Úkeysrp   rq   r]   r   r   r   Ú__len__˜   s    ÿþÿzXrefTable.__len__c                 C  s*   t | j ¡ ƒt | j ¡ ƒ t | j ¡ ƒB S r>   )r€   ro   r   rq   rp   r]   r   r   r   r   Ÿ   s    þzXrefTable.keysc              	   C  sL  t t| j ¡ ƒt| j ¡ ƒB ƒ}t t| j ¡ ƒƒ}| ¡ }| d¡ |rHd }t|ƒD ]>\}}|d ksr|d |krx|}qV|d |… }||d … } qžqV|}d }| d|d t|ƒf ¡ |D ]ˆ}	|	| jkrà| d| j|	  ¡ q¼| 	d¡}
t
|	|
kd|	› d|
› ƒ z|d }W n tk
r*   d}Y nX | d|| j|	 f ¡ q¼qD|S )	Ns   xref
r{   s   %d %d
r   s   %010d %05d n 
z*expected the next deleted object ID to be z, instead found s   %010d %05d f 
)Úsortedr€   rp   r   rq   ÚtellÚwriteÚ	enumeraterE   ÚpoprS   r|   )r^   Úfr   Zdeleted_keysZ	startxrefÚprevÚindexru   Zcontiguous_keysrV   Zthis_deleted_object_idZnext_in_linked_listr   r   r   r…   ¤   sB     


þ
ÿÿzXrefTable.writeN)rK   rL   rM   rs   rw   rz   r~   r   r‚   r   r…   r   r   r   r   rn   r   s   rn   c                   @  s|   e Zd Zdd„ Zddœdd„Zdd„ Zd	dœd
d„Zddœdd„Zedd„ ƒZ	e
eddƒƒdd„ dD ƒ Zddœdd„ZdS )ÚPdfNamec                 C  s6   t |tƒr|j| _nt |tƒr&|| _n| d¡| _d S r`   )rf   r‹   Únamer	   r   )r^   rŒ   r   r   r   rs   Í   s
    


zPdfName.__init__r   rZ   c                 C  s   | j  d¡S r`   )rŒ   rF   r]   r   r   r   Úname_as_strÕ   s    zPdfName.name_as_strc                 C  s    t |tƒr|j| jkp|| jkS r>   )rf   r‹   rŒ   rh   r   r   r   ri   Ø   s    ÿþzPdfName.__eq__rU   c                 C  s
   t | jƒS r>   )rk   rŒ   r]   r   r   r   rl   Ý   s    zPdfName.__hash__c                 C  s   | j j› dt| jƒ› dS )Nú(ú))re   rK   ÚreprrŒ   r]   r   r   r   Ú__repr__à   s    zPdfName.__repr__c                 C  s   | t  |¡ƒS r>   )Ú	PdfParserÚinterpret_name)ÚclsÚdatar   r   r   Úfrom_pdf_streamã   s    zPdfName.from_pdf_streamé!   é   c                 C  s   h | ]}t |ƒ’qS r   )Úord)rB   Úcr   r   r   Ú	<setcomp>ç   s     zPdfName.<setcomp>z#%/()<>[]{}r	   c                 C  s@   t dƒ}| jD ](}|| jkr(| |¡ q| d| ¡ qt|ƒS )Nó   /s   #%02X)Ú	bytearrayrŒ   Úallowed_charsÚappendÚextendr	   )r^   ÚresultrH   r   r   r   rb   é   s    

zPdfName.__bytes__N)rK   rL   rM   rs   r   ri   rl   r‘   Úclassmethodr–   r€   Úrangerž   rb   r   r   r   r   r‹   Ì   s   
r‹   c                   @  s   e Zd Zddœdd„ZdS )ÚPdfArrayr	   rZ   c                 C  s   dd  dd„ | D ƒ¡ d S )Ns   [ ó    c                 s  s   | ]}t |ƒV  qd S r>   )Úpdf_repr)rB   Úxr   r   r   rD   õ   s     z%PdfArray.__bytes__.<locals>.<genexpr>s    ])rG   r]   r   r   r   rb   ô   s    zPdfArray.__bytes__N)rK   rL   rM   rb   r   r   r   r   r¤   ó   s   r¤   c                   @  s*   e Zd Zdd„ Zdd„ Zddœdd„Zd	S )
ÚPdfDictc                 C  s,   |dkrt j | ||¡ n|| | d¡< d S )Nr•   ra   )ÚcollectionsÚUserDictÚ__setattr__r   rt   r   r   r   r«   ÿ   s    zPdfDict.__setattr__c              
   C  s,  z| |  d¡ }W n, tk
r> } zt|ƒ|‚W 5 d }~X Y nX t|tƒrRt|ƒ}| d¡r(| d¡rt|dd … }d}t|ƒdkrÀ|d }t	|dd… ƒd	 }t|ƒd
krÀ|t	|dd
… ƒ7 }dd t|ƒd … }t
 |d t|ƒd … |¡}|dkr(|d	9 }|dkr|d9 }t
 t |¡| ¡}|S )Nra   ÚDatezD:é   ÚZé   é   é   é<   é   é   z%Y%m%d%H%M%S)ú+ú-rµ   éÿÿÿÿ)r   rx   ÚAttributeErrorrf   r	   rI   ÚendswithÚ
startswithrE   rU   ÚtimeÚstrptimeÚgmtimeÚcalendarÚtimegm)r^   ru   rv   ÚeZrelationshipÚoffsetÚformatr   r   r   Ú__getattr__  s.    



zPdfDict.__getattr__r	   rZ   c                 C  sn   t dƒ}|  ¡ D ]J\}}|d kr"qt|ƒ}| d¡ | tt|ƒƒ¡ | d¡ | |¡ q| d¡ t|ƒS )Nó   <<ó   
r¥   s   
>>)r   Úitemsr¦   r    r	   r‹   )r^   Úoutru   rv   r   r   r   rb      s    


zPdfDict.__bytes__N)rK   rL   rM   r«   rÃ   rb   r   r   r   r   r¨   þ   s   r¨   c                   @  s"   e Zd Zdd„ Zddœdd„ZdS )Ú	PdfBinaryc                 C  s
   || _ d S r>   )r•   )r^   r•   r   r   r   rs   /  s    zPdfBinary.__init__r	   rZ   c                 C  s   dd  dd„ | jD ƒ¡ S )Ns   <%s>ó    c                 s  s   | ]}d | V  qdS )s   %02XNr   ©rB   rH   r   r   r   rD   3  s     z&PdfBinary.__bytes__.<locals>.<genexpr>)rG   r•   r]   r   r   r   rb   2  s    zPdfBinary.__bytes__N)rK   rL   rM   rs   rb   r   r   r   r   rÈ   .  s   rÈ   c                   @  s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfStreamc                 C  s   || _ || _d S r>   )Ú
dictionaryÚbuf)r^   rÌ   rÍ   r   r   r   rs   7  s    zPdfStream.__init__c                 C  sŽ   z| j j}W n tk
r&   | j Y S X |dkrnz| j j}W n tk
rX   | j j}Y nX tj| jt|ƒdS dt	| j jƒ› d}t
|ƒ‚d S )Ns   FlateDecode)Úbufsizezstream filter z unknown/unsupported)rÌ   ÚFilterr¸   rÍ   ZDLÚLengthÚzlibÚ
decompressrU   r   ÚNotImplementedError)r^   ÚfilterZexpected_lengthr}   r   r   r   rF   ;  s    zPdfStream.decodeN)rK   rL   rM   rs   rF   r   r   r   r   rË   6  s   rË   c                 C  s
  | dkrdS | dkrdS | d kr$dS t | ttttfƒr>t| ƒS t | ttfƒrZt| ƒ 	d¡S t | t
jƒr€dt
 d| ¡ 	d¡ d	 S t | tƒr–tt| ƒƒS t | tƒr¬tt| ƒƒS t | tƒrÂtt| ƒƒS t | tƒ rþ|  d
d¡} |  dd¡} |  d	d¡} d|  d	 S t| ƒS d S )NTs   trueFs   falses   nullra   s   (D:z%Y%m%d%H%M%SZó   )ó   \s   \\ó   (ó   \(s   \))rf   r‹   r¨   r¤   rÈ   r	   rU   Úfloatr   r   r»   Ústruct_timeÚstrftimeÚdictÚlistr¦   r   Úreplace)r§   r   r   r   r¦   K  s0    


r¦   c                    @  s2  e Zd ZdZdwdd„Zd dœdd	„Zd
ddœdd„Zddœdd„Zddœdd„Zddœdd„Z	ddœdd„Z
ddœdd„Zdd„ Zddœdd„Zddœdd„Zdxd d!„Zd"d#„ Zd$d%„ Zddœd&d'„Zed(d)„ ƒZddœd*d+„Zdyd,d-„Zd.Zd/Zd0Zd1Zed2 Zed3 Zd4Zd5Zee e Ze  !ed6 e d7 e d8 e d9 e d: e d; e j"¡Z#e  !ed6 e d< e d8 e d9 e d: e e j"¡Z$d=d>„ Z%d?d@„ Z&e  !e¡Z'e  !edA e dB ¡Z(e  !edC ¡Z)e  !edD e ¡Z*e+dEdF„ ƒZ,e  !dG¡Z-e+dzdIdJ„ƒZ.e  !edK e dB ¡Z/e  !edL e dB ¡Z0e  !edM e dB ¡Z1e  !edN e dB ¡Z2e  !edO e dB ¡Z3e  !edP ¡Z4e  !edQ ¡Z5e  !edR e dS ¡Z6e  !edT ¡Z7e  !edU e dU e dV e dB ¡Z8e  !edU e dU e dW e dB ¡Z9e  !edX e dB ¡Z:e  !dYe dZ e d[ ¡Z;e  !ed\ ¡Z<e  !ed] e dB ¡Z=e+d{d_d`„ƒZ>e  !da¡Z?dbdcdddedfdgdhdidjdkdYdYdBdBdldle@dbƒdce@ddƒdee@dfƒdge@dhƒdie@djƒdke@dYƒdYe@dBƒdBe@dlƒdliZAe+dmdn„ ƒZBe  !edo e ¡ZCe  !ed9 e d9 e e ¡ZDe  !dp¡ZEdqdr„ ZFd|dsdt„ZGd}dudv„ZHdS )~r’   z|Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    Nr   Úrbc                 C  s  |r|rd}t |ƒ‚|| _|| _|| _|| _d| _d| _|d k	r^|d kr^t||ƒ | _}d| _|d k	r’|  |¡ | _}d| _|s’t	|dƒr’|j
| _i | _|r¦|  ¡  nTd | _| _tƒ | _d | _tƒ | _d | _i | _g | _g | _d | _d | _i | _tƒ | _d| j_|r|  ¡  d S )Nz4specify buf or f or filename, but not both buf and fFTrŒ   r   )ÚRuntimeErrorÚfilenamerÍ   rˆ   Ústart_offsetÚshould_close_bufÚshould_close_fileÚopenÚget_buf_from_fileÚhasattrrŒ   Úcached_objectsÚread_pdf_infoÚfile_size_totalÚfile_size_thisr¨   ÚrootÚroot_refÚinfoÚinfo_refÚpage_tree_rootÚpagesÚ
orig_pagesÚ	pages_refÚlast_xref_section_offsetÚtrailer_dictrn   Ú
xref_tablerr   Úseek_end)r^   rá   rˆ   rÍ   râ   Úmoder}   r   r   r   rs   n  sF    
zPdfParser.__init__rZ   c                 C  s   | S r>   r   r]   r   r   r   Ú	__enter__”  s    zPdfParser.__enter__rc   rP   )Úargsr   c                 G  s   |   ¡  d S r>   )Úclose)r^   rú   r   r   r   Ú__exit__—  s    zPdfParser.__exit__c                 C  s   |   ¡  |  ¡  d S r>   )Ú	close_bufr÷   r]   r   r   r   Ústart_writingš  s    zPdfParser.start_writingc                 C  s.   z| j  ¡  W n tk
r"   Y nX d | _ d S r>   )rÍ   rû   r¸   r]   r   r   r   rý   ž  s
    zPdfParser.close_bufc                 C  s2   | j r|  ¡  | jd k	r.| jr.| j ¡  d | _d S r>   )rã   rý   rˆ   rä   rû   r]   r   r   r   rû   ¥  s
    
zPdfParser.closec                 C  s   | j  dtj¡ d S )Nr   )rˆ   ÚseekÚosÚSEEK_ENDr]   r   r   r   r÷   ¬  s    zPdfParser.seek_endc                 C  s   | j  d¡ d S )Ns	   %PDF-1.4
)rˆ   r…   r]   r   r   r   Úwrite_header¯  s    zPdfParser.write_headerc                 C  s   | j  d|› d ¡ ¡ d S )Nz% Ú
)rˆ   r…   r   )r^   r
   r   r   r   Úwrite_comment²  s    zPdfParser.write_commentrY   c                 C  sl   |   ¡  |  | j ¡ ¡| _|  d¡| _|  ¡  | j| jtdƒ| jd | j| jtdƒt	| j
ƒ| j
d | jS )Nr   ó   Catalog)ÚTypeZPagesó   Pages)r  ZCountZKids)Údel_rootÚnext_object_idrˆ   r„   rí   ró   Úrewrite_pagesÚ	write_objr‹   rE   rñ   r]   r   r   r   Úwrite_catalogµ  s    üzPdfParser.write_catalogc                 C  sî   g }t | jƒD ]˜\}}| j| }| j|j= | |tdƒ ¡ || jkrHqi }| ¡ D ]\}}||| 	¡ < qT| j
|d< | jd|Ž}t | jƒD ]\}	}
|
|krŠ|| j|	< qŠq|D ]6}|r¬| j| }|j| jkrÔ| j|j= | dd ¡}q°q¬g | _d S )Ns   ParentÚParent)N)r†   rò   rè   rö   rV   rŸ   r‹   rñ   rÆ   r   ró   Ú
write_pager@   )r^   Zpages_tree_nodes_to_deleteÚiZpage_refZ	page_infoZstringified_page_inforu   rv   Znew_page_refÚjZcur_page_refZpages_tree_node_refZpages_tree_noder   r   r   r
  Ã  s,    





zPdfParser.rewrite_pagesc                 C  sš   |r|   ¡  || _| jr(|  d | j¡| _| j | j¡}t| jƒ}| j|dœ}| j	d k	r`| j	|d< | jrp| j|d< || _	| j dt
t|ƒƒ d|  ¡ d S )N)ó   Rootó   Sizeó   Prevó   Infos   trailer
s   
startxref
%d
%%%%EOF)r  rí   rî   r  rï   rö   r…   rˆ   rE   rô   r	   r¨   )r^   Znew_root_refZ
start_xrefZnum_entriesrõ   r   r   r   Úwrite_xref_and_trailerà  s(    




ÿþÿz PdfParser.write_xref_and_trailerc                 O  sL   t |tƒr| j| }d|kr(tdƒ|d< d|kr:| j|d< | j|f|ž|ŽS )Nr  ó   Pager  )rf   rU   rñ   r‹   ró   r  )r^   ÚrefÚobjsÚdict_objr   r   r   r  ô  s    


zPdfParser.write_pagec                 O  sÄ   | j }|d kr|  | ¡ ¡}n| ¡ |jf| j|j< | tt|Ž ƒ¡ | 	dd ¡}|d k	rft
|ƒ|d< |rx| t|ƒ¡ |D ]}| t|ƒ¡ q||d k	r¶| d¡ | |¡ | d¡ | d¡ |S )NÚstreamrÐ   s   stream
s   
endstream
s   endobj
)rˆ   r	  r„   rW   rö   rV   r…   r	   rm   r‡   rE   r¦   )r^   r  r  r  rˆ   r  Úobjr   r   r   r  ý  s$    



zPdfParser.write_objc                 C  s.   | j d krd S | j| j j= | j| jd j= d S )Nr  )rí   rö   rV   rì   r]   r   r   r   r    s    
zPdfParser.del_rootc                 C  sX   t | dƒr|  ¡ S t | dƒr$|  ¡ S ztj|  ¡ dtjdW S  tk
rR   Y dS X d S )NÚ	getbufferÚgetvaluer   )ÚaccessrÉ   )rç   r  r  ÚmmapÚfilenoZACCESS_READÚ
ValueError)rˆ   r   r   r   ræ     s    

zPdfParser.get_buf_from_filec                 C  sü   t | jƒ| _| j| j | _|  ¡  | jd | _| j dd ¡| _	t
|  | j¡ƒ| _| j	d krdt
ƒ | _nt
|  | j	¡ƒ| _td| jkdƒ t| jd dkdƒ td| jkdƒ tt| jd tƒd	ƒ | jd | _|  | j¡| _|  | j¡| _| jd d … | _d S )
Nr  r  ó   Typez/Type missing in Rootr  z/Type in Root is not /Catalogr  z/Pages missing in Rootz+/Pages in Root is not an indirect reference)rE   rÍ   rê   râ   rë   Úread_trailerrõ   rí   r@   rï   r¨   Úread_indirectrì   rî   rS   rf   rY   ró   rð   Úlinearize_page_treerñ   rò   r]   r   r   r   ré   $  s.    

 ÿþzPdfParser.read_pdf_infoc                 C  sX   zt t| j ¡ ƒd dƒ}W n tk
r:   t ddƒ}Y nX |d k	rT|df| j|j< |S )Nr{   r   )rY   Úmaxrö   r   r!  rV   )r^   rÁ   Ú	referencer   r   r   r	  @  s    zPdfParser.next_object_ids   [][()<>{}/%]s$   [][()<>{}/%\000\011\012\014\015\040]s   [\000\011\012\014\015\040]s#   [\000\011\012\014\015\0400-9a-fA-F]ó   *ó   +s   [\000\011\014\040]*s   [\r\n]+s   trailers   <<(.*>>)s	   startxrefs   ([0-9]+)s   %%EOFó   $s	   <<(.*?>>)c                 C  sÀ   t | jƒd }|| jk r| j}| j | j|¡}t|dƒ |}|r^|}| j | j| ¡ d ¡}q<|sf|}| d¡}t| d¡ƒ| _	|  
|¡| _tƒ | _| j| j	d d| jkr¼|  | jd ¡ d S )Né @  ztrailer end not foundé   r{   r­   ©Úxref_section_offsetr  )rE   rÍ   râ   Úre_trailer_endÚsearchrS   ÚstartÚgrouprU   rô   Úinterpret_trailerrõ   rn   rö   Úread_xref_tableÚread_prev_trailer)r^   Zsearch_start_offsetÚmZ
last_matchÚtrailer_datar   r   r   r#  r  s$    



zPdfParser.read_trailerc                 C  sv   | j |d}| j | j||d … ¡}t|dƒ | d¡}tt| d¡ƒ|kdƒ |  |¡}d|krr|  |d ¡ d S )Nr-  r+  zprevious trailer not foundr{   r­   zGxref section offset in previous trailer doesn't match what was expectedr  )	r4  Úre_trailer_prevr0  rÍ   rS   r2  rU   r3  r5  )r^   r.  Ztrailer_offsetr6  r7  rõ   r   r   r   r5  ‡  s    ÿ

þ
zPdfParser.read_prev_trailers   /([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=rÕ   rÄ   s   >>c                 C  s¾   i }d}| j  ||¡}|sT| j ||¡}t|o<| ¡ t|ƒkdt||d … ƒ ƒ q‚|  | d¡¡}|  	|| ¡ ¡\}}|||< qtd|ko˜t
|d tƒdƒ td|ko´t
|d tƒdƒ |S )Nr   z+name not found in trailer, remaining data: r{   r  z&/Size not in trailer or not an integerr  z1/Root not in trailer or not an indirect reference)Úre_nameÚmatchÚre_dict_endrS   ÚendrE   r   r“   r2  Ú	get_valuerf   rU   rY   )r”   r7  ÚtrailerrÁ   r6  ru   rv   r   r   r   r3     s0    ÿþ
þþzPdfParser.interpret_trailers   ([^#]*)(#([0-9a-fA-F]{2}))?Fc                 C  sn   d}| j  |¡D ]B}| d¡rD|| d¡t | d¡ d¡¡ 7 }q|| d¡7 }q|rb| d¡S t|ƒS d S )NrÉ   é   r{   ra   zutf-8)Úre_hashes_in_nameÚfinditerr2  r   ÚfromhexrF   r	   )r”   ÚrawZas_textrŒ   r6  r   r   r   r“   ½  s    
&
zPdfParser.interpret_names   null(?=s   true(?=s   false(?=s   ([-+]?[0-9]+)(?=s)   ([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s   \[ó   ]s   <(s   *)>rØ   s   ([-+]?[0-9]+)s   R(?=s   obj(?=s	   endobj(?=r×   s	   %[^\r\n]*s   )*s   stream\r?\ns   endstream(?=r·   c              
   C  sê  |dkrdS | j  ||¡}|r&| ¡ }| j ||¡}|rètt| d¡ƒdkdƒ tt| d¡ƒdkdƒ t|d kp’|tt| d¡ƒt| d¡ƒƒkdƒ | j|| ¡ |d d\}}|d krÄ|d fS | j	 ||¡}t|d	ƒ || ¡ fS t| d
ƒ | j
 ||¡}|r^tt| d¡ƒdkdƒ tt| d¡ƒdkdƒ tt| d¡ƒt| d¡ƒƒ| ¡ fS | j ||¡}|rÒ| ¡ }i }| j ||¡}|s | j|||d d\}}|d kr¼|d fS | j|||d d\}	}|	||< |d krî|d fS | j ||¡}qŒ| ¡ }| j ||¡}|rÂz| d¡}
t|
ƒ}W n> ttfk
rp } zd|
› d}t|ƒ|‚W 5 d }~X Y nX || ¡ | ¡ | … }| j || ¡ | ¡}t|dƒ | ¡ }tt|ƒ|ƒ}nt|ƒ}||fS | j ||¡}|rX| ¡ }g }| j ||¡}|sL| j|||d d\}	}| |	¡ |d kr:|d fS | j ||¡}q || ¡ fS | j ||¡}|rxd | ¡ fS | j ||¡}|r˜d| ¡ fS | j ||¡}|r¸d| ¡ fS | j ||¡}|rèt|  | d¡¡ƒ| ¡ fS | j ||¡}|rt| d¡ƒ| ¡ fS | j ||¡}|r<t | d¡ƒ| ¡ fS | j! ||¡}|r t"dd„ | d¡D ƒƒ}t#|ƒd dkrˆ| t$dƒ¡ t" %| &d¡¡| ¡ fS | j' ||¡}|rÄ|  (|| ¡ ¡S dt)|||d … ƒ› }t|ƒ‚d S )Nr   )NNr{   z<indirect object definition: object ID must be greater than 0r­   z;indirect object definition: generation must be non-negativez2indirect object definition different than expected)Úmax_nestingz(indirect object definition end not foundz$indirect object definition not foundz;indirect object reference: object ID must be greater than 0z:indirect object reference: generation must be non-negatives   Lengthz&bad or missing Length in stream dict (r   zstream end not foundTFc                 s  s   | ]}|d kr|V  qdS )s   0123456789abcdefABCDEFNr   rÊ   r   r   r   rD   g  s     z&PdfParser.get_value.<locals>.<genexpr>ó   0ra   zunrecognized object: é    )*Ú
re_commentr:  r<  Úre_indirect_def_startrS   rU   r2  rY   r=  Úre_indirect_def_endÚre_indirect_referenceÚre_dict_startr;  Úre_stream_startr@   Ú	TypeErrorr!  rJ   Úre_stream_endrË   r¨   Úre_array_startÚre_array_endrŸ   Úre_nullÚre_trueÚre_falser9  r‹   r“   Úre_intÚre_realrÙ   Úre_string_hexr   rE   r™   rB  rF   Úre_string_litÚget_literal_stringr   )r”   r•   rÁ   Úexpect_indirectrE  r6  rc   r¡   ru   rv   Zstream_len_strZ
stream_lenrÀ   r}   Zstream_dataZ
hex_stringr   r   r   r=  û  sè    þþÿü
 ÿþþ&





ÿzPdfParser.get_valuesF   (\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))ó   nrÅ   ó   ró   ó   tó   	ó   bó   ó   fó   rÖ   c                 C  s  d}t ƒ }| j ||¡D ]ê}| ||| ¡ … ¡ | d¡rX| | j| d¡d  ¡ n¢| d¡r‚| t| d¡dd … dƒ¡ nx| d¡rŽnl| d¡r¤| d¡ nV| d¡rÂ| d	¡ |d7 }n8| d
¡rú|dkrèt	|ƒ| 
¡ f  S | d¡ |d8 }| 
¡ }qd}t|ƒ‚d S )Nr   r{   r­   é   r?  é   rÅ   é   r×   é   rÕ   zunfinished literal string)r   Úre_lit_str_tokenrA  r    r1  r2  Úescaped_charsrŸ   rU   r	   r<  rJ   )r”   r•   rÁ   Znesting_depthr¡   r6  r}   r   r   r   rY  ‹  s.    

 







zPdfParser.get_literal_strings   xrefs+   ([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c                 C  sö   d}| j  | j|| j ¡}t|dƒ | ¡ }| j | j|¡}|sLt|dƒ qòd}| ¡ }t| d¡ƒ}t| d¡ƒ}t	||| ƒD ]l}| j
 | j|¡}t|dƒ | ¡ }| d¡d	k}|s‚t| d¡ƒ}	t| d¡ƒ|	f}
|| jkr‚|
| j|< q‚q,|S )
NFzxref section start not foundzxref subsection start not foundTr{   r­   zxref entry not foundr?  rb  )Úre_xref_section_startr:  rÍ   râ   rS   r<  Úre_xref_subsection_startrU   r2  r£   Úre_xref_entryrö   )r^   r.  Zsubsection_foundr6  rÁ   Zfirst_objectÚnum_objectsr  Zis_freerW   Z	new_entryr   r   r   r4  °  s:     ÿ
 ÿ

zPdfParser.read_xref_tablec              
   C  st   | j |d  \}}t||d kd|d › d|d › d|› d|› ƒ | j| j|| j t|Ž |dd }|| j|< |S )Nr   r{   zexpected to find generation z for object ID z) in xref table, instead found generation z at offset )rZ  rE  )rö   rS   r=  rÍ   râ   rY   rè   )r^   r  rE  rÁ   rW   rv   r   r   r   r$  Î  s    
"þüû
zPdfParser.read_indirectc                 C  sj   |d kr| j }t|d dkdƒ g }|d D ]8}|  |¡}|d dkrR| |¡ q,| | j|d¡ q,|S )Nr"  r  z%/Type of page tree node is not /Pagess   Kidsr  )Únode)rð   rS   r$  rŸ   r    r%  )r^   rn  rñ   ZkidZ
kid_objectr   r   r   r%  Þ  s    
 ÿ
zPdfParser.linearize_page_tree)NNNr   rß   )N)N)F)Nr·   )r·   )N)IrK   rL   rM   rN   rs   rù   rü   rþ   rý   rû   r÷   r  r  r  r
  r  r  r  r  Ústaticmethodræ   ré   r	  Ú	delimiterZdelimiter_or_wsÚ
whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZwhitespace_optional_no_nlZnewline_onlyÚnewlineÚreÚcompileÚDOTALLr/  r8  r#  r5  Zre_whitespace_optionalr9  rL  r;  r¢   r3  r@  r“   rR  rS  rT  rU  rV  rP  rQ  rW  rX  rK  rI  rJ  rH  rM  rO  r=  rh  r™   ri  rY  rj  rk  rl  r4  r$  r%  r   r   r   r   r’   h  sº  
&
	


ÿþýüûúùø	÷
öõóÿþýüûúùø	÷
öô
ÿþýÿ

ÿÿþýÿÿÿþýüûúùÿ
ÿþýüûúùÿ
ÿÿÿyÿ                ð
ÿþýüûÿ

r’   )%Ú
__future__r   r¾   r   r©   r  r   rs  r»   rÑ   Útypingr   r   r   r   r   r   r?   rI   rà   rJ   rS   rT   rY   rm   rn   r‹   r¤   rª   r   r	   Z	_DictBaser¨   rÈ   rË   r¦   r’   r   r   r   r   Ú<module>   sŠ   ×-Z'0