pyPdf: upgrade from upstream git: 4abdca42a7d8a4
bzr revid: p_christ@hol.gr-20110106111400-hqw1nu5wx1mict4t
This commit is contained in:
parent
660565f56e
commit
73af237c8b
|
@ -1,3 +1,2 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from pdf import PdfFileReader, PdfFileWriter
|
||||
__all__ = ["pdf"]
|
||||
|
|
|
@ -1,253 +1,252 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
"""
|
||||
Implementation of stream filters for PDF.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
from utils import PdfReadError
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
try:
|
||||
import zlib
|
||||
def decompress(data):
|
||||
return zlib.decompress(data)
|
||||
def compress(data):
|
||||
return zlib.compress(data)
|
||||
except ImportError:
|
||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||
# library from the .NET framework. (IronPython only)
|
||||
import System
|
||||
from System import IO, Collections, Array
|
||||
def _string_to_bytearr(buf):
|
||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||
for i in range(len(buf)):
|
||||
retval[i] = ord(buf[i])
|
||||
return retval
|
||||
def _bytearr_to_string(bytes):
|
||||
retval = ""
|
||||
for i in range(bytes.Length):
|
||||
retval += chr(bytes[i])
|
||||
return retval
|
||||
def _read_bytes(stream):
|
||||
ms = IO.MemoryStream()
|
||||
buf = Array.CreateInstance(System.Byte, 2048)
|
||||
while True:
|
||||
bytes = stream.Read(buf, 0, buf.Length)
|
||||
if bytes == 0:
|
||||
break
|
||||
else:
|
||||
ms.Write(buf, 0, bytes)
|
||||
retval = ms.ToArray()
|
||||
ms.Close()
|
||||
return retval
|
||||
def decompress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
ms.Write(bytes, 0, bytes.Length)
|
||||
ms.Position = 0 # fseek 0
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
|
||||
bytes = _read_bytes(gz)
|
||||
retval = _bytearr_to_string(bytes)
|
||||
gz.Close()
|
||||
return retval
|
||||
def compress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
|
||||
gz.Write(bytes, 0, bytes.Length)
|
||||
gz.Close()
|
||||
ms.Position = 0 # fseek 0
|
||||
bytes = ms.ToArray()
|
||||
retval = _bytearr_to_string(bytes)
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
|
||||
class FlateDecode(object):
|
||||
def decode(data, decodeParms):
|
||||
data = decompress(data)
|
||||
predictor = 1
|
||||
if decodeParms:
|
||||
predictor = decodeParms.get("/Predictor", 1)
|
||||
# predictor 1 == no predictor
|
||||
if predictor != 1:
|
||||
columns = decodeParms["/Columns"]
|
||||
# PNG prediction:
|
||||
if predictor >= 10 and predictor <= 15:
|
||||
output = StringIO()
|
||||
# PNG prediction can vary from row to row
|
||||
rowlength = columns + 1
|
||||
assert len(data) % rowlength == 0
|
||||
prev_rowdata = (0,) * rowlength
|
||||
for row in xrange(len(data) / rowlength):
|
||||
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
|
||||
filterByte = rowdata[0]
|
||||
if filterByte == 0:
|
||||
pass
|
||||
elif filterByte == 1:
|
||||
for i in range(2, rowlength):
|
||||
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
|
||||
elif filterByte == 2:
|
||||
for i in range(1, rowlength):
|
||||
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
|
||||
else:
|
||||
# unsupported PNG filter
|
||||
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
|
||||
prev_rowdata = rowdata
|
||||
output.write(''.join([chr(x) for x in rowdata[1:]]))
|
||||
data = output.getvalue()
|
||||
else:
|
||||
# unsupported predictor
|
||||
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
|
||||
return data
|
||||
decode = staticmethod(decode)
|
||||
|
||||
def encode(data):
|
||||
return compress(data)
|
||||
encode = staticmethod(encode)
|
||||
|
||||
class ASCIIHexDecode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
char = ""
|
||||
x = 0
|
||||
while True:
|
||||
c = data[x]
|
||||
if c == ">":
|
||||
break
|
||||
elif c.isspace():
|
||||
x += 1
|
||||
continue
|
||||
char += c
|
||||
if len(char) == 2:
|
||||
retval += chr(int(char, base=16))
|
||||
char = ""
|
||||
x += 1
|
||||
assert char == ""
|
||||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
class ASCII85Decode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
group = []
|
||||
x = 0
|
||||
hitEod = False
|
||||
# remove all whitespace from data
|
||||
data = [y for y in data if not (y in ' \n\r\t')]
|
||||
while not hitEod:
|
||||
c = data[x]
|
||||
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
||||
x += 2
|
||||
continue
|
||||
#elif c.isspace():
|
||||
# x += 1
|
||||
# continue
|
||||
elif c == 'z':
|
||||
assert len(group) == 0
|
||||
retval += '\x00\x00\x00\x00'
|
||||
continue
|
||||
elif c == "~" and data[x+1] == ">":
|
||||
if len(group) != 0:
|
||||
# cannot have a final group of just 1 char
|
||||
assert len(group) > 1
|
||||
cnt = len(group) - 1
|
||||
group += [ 85, 85, 85 ]
|
||||
hitEod = cnt
|
||||
else:
|
||||
break
|
||||
else:
|
||||
c = ord(c) - 33
|
||||
assert c >= 0 and c < 85
|
||||
group += [ c ]
|
||||
if len(group) >= 5:
|
||||
b = group[0] * (85**4) + \
|
||||
group[1] * (85**3) + \
|
||||
group[2] * (85**2) + \
|
||||
group[3] * 85 + \
|
||||
group[4]
|
||||
assert b < (2**32 - 1)
|
||||
c4 = chr((b >> 0) % 256)
|
||||
c3 = chr((b >> 8) % 256)
|
||||
c2 = chr((b >> 16) % 256)
|
||||
c1 = chr(b >> 24)
|
||||
retval += (c1 + c2 + c3 + c4)
|
||||
if hitEod:
|
||||
retval = retval[:-4+hitEod]
|
||||
group = []
|
||||
x += 1
|
||||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
def decodeStreamData(stream):
|
||||
from generic import NameObject
|
||||
filters = stream.get("/Filter", ())
|
||||
if len(filters) and not isinstance(filters[0], NameObject):
|
||||
# we have a single filter instance
|
||||
filters = (filters,)
|
||||
data = stream._data
|
||||
for filterType in filters:
|
||||
if filterType == "/FlateDecode":
|
||||
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCIIHexDecode":
|
||||
data = ASCIIHexDecode.decode(data)
|
||||
elif filterType == "/ASCII85Decode":
|
||||
data = ASCII85Decode.decode(data)
|
||||
elif filterType == "/Crypt":
|
||||
decodeParams = stream.get("/DecodeParams", {})
|
||||
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||
else:
|
||||
# unsupported filter
|
||||
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
|
||||
|
||||
ascii85Test = """
|
||||
<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
|
||||
O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
|
||||
i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
|
||||
l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
|
||||
>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
|
||||
"""
|
||||
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
|
||||
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
|
||||
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
"""
|
||||
Implementation of stream filters for PDF.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
from utils import PdfReadError
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
try:
|
||||
import zlib
|
||||
def decompress(data):
|
||||
return zlib.decompress(data)
|
||||
def compress(data):
|
||||
return zlib.compress(data)
|
||||
except ImportError:
|
||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||
# library from the .NET framework. (IronPython only)
|
||||
import System
|
||||
from System import IO, Collections, Array
|
||||
def _string_to_bytearr(buf):
|
||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||
for i in range(len(buf)):
|
||||
retval[i] = ord(buf[i])
|
||||
return retval
|
||||
def _bytearr_to_string(bytes):
|
||||
retval = ""
|
||||
for i in range(bytes.Length):
|
||||
retval += chr(bytes[i])
|
||||
return retval
|
||||
def _read_bytes(stream):
|
||||
ms = IO.MemoryStream()
|
||||
buf = Array.CreateInstance(System.Byte, 2048)
|
||||
while True:
|
||||
bytes = stream.Read(buf, 0, buf.Length)
|
||||
if bytes == 0:
|
||||
break
|
||||
else:
|
||||
ms.Write(buf, 0, bytes)
|
||||
retval = ms.ToArray()
|
||||
ms.Close()
|
||||
return retval
|
||||
def decompress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
ms.Write(bytes, 0, bytes.Length)
|
||||
ms.Position = 0 # fseek 0
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
|
||||
bytes = _read_bytes(gz)
|
||||
retval = _bytearr_to_string(bytes)
|
||||
gz.Close()
|
||||
return retval
|
||||
def compress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
|
||||
gz.Write(bytes, 0, bytes.Length)
|
||||
gz.Close()
|
||||
ms.Position = 0 # fseek 0
|
||||
bytes = ms.ToArray()
|
||||
retval = _bytearr_to_string(bytes)
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
|
||||
class FlateDecode(object):
|
||||
def decode(data, decodeParms):
|
||||
data = decompress(data)
|
||||
predictor = 1
|
||||
if decodeParms:
|
||||
predictor = decodeParms.get("/Predictor", 1)
|
||||
# predictor 1 == no predictor
|
||||
if predictor != 1:
|
||||
columns = decodeParms["/Columns"]
|
||||
# PNG prediction:
|
||||
if predictor >= 10 and predictor <= 15:
|
||||
output = StringIO()
|
||||
# PNG prediction can vary from row to row
|
||||
rowlength = columns + 1
|
||||
assert len(data) % rowlength == 0
|
||||
prev_rowdata = (0,) * rowlength
|
||||
for row in xrange(len(data) / rowlength):
|
||||
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
|
||||
filterByte = rowdata[0]
|
||||
if filterByte == 0:
|
||||
pass
|
||||
elif filterByte == 1:
|
||||
for i in range(2, rowlength):
|
||||
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
|
||||
elif filterByte == 2:
|
||||
for i in range(1, rowlength):
|
||||
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
|
||||
else:
|
||||
# unsupported PNG filter
|
||||
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
|
||||
prev_rowdata = rowdata
|
||||
output.write(''.join([chr(x) for x in rowdata[1:]]))
|
||||
data = output.getvalue()
|
||||
else:
|
||||
# unsupported predictor
|
||||
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
|
||||
return data
|
||||
decode = staticmethod(decode)
|
||||
|
||||
def encode(data):
|
||||
return compress(data)
|
||||
encode = staticmethod(encode)
|
||||
|
||||
class ASCIIHexDecode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
char = ""
|
||||
x = 0
|
||||
while True:
|
||||
c = data[x]
|
||||
if c == ">":
|
||||
break
|
||||
elif c.isspace():
|
||||
x += 1
|
||||
continue
|
||||
char += c
|
||||
if len(char) == 2:
|
||||
retval += chr(int(char, base=16))
|
||||
char = ""
|
||||
x += 1
|
||||
assert char == ""
|
||||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
class ASCII85Decode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
group = []
|
||||
x = 0
|
||||
hitEod = False
|
||||
# remove all whitespace from data
|
||||
data = [y for y in data if not (y in ' \n\r\t')]
|
||||
while not hitEod:
|
||||
c = data[x]
|
||||
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
||||
x += 2
|
||||
continue
|
||||
#elif c.isspace():
|
||||
# x += 1
|
||||
# continue
|
||||
elif c == 'z':
|
||||
assert len(group) == 0
|
||||
retval += '\x00\x00\x00\x00'
|
||||
continue
|
||||
elif c == "~" and data[x+1] == ">":
|
||||
if len(group) != 0:
|
||||
# cannot have a final group of just 1 char
|
||||
assert len(group) > 1
|
||||
cnt = len(group) - 1
|
||||
group += [ 85, 85, 85 ]
|
||||
hitEod = cnt
|
||||
else:
|
||||
break
|
||||
else:
|
||||
c = ord(c) - 33
|
||||
assert c >= 0 and c < 85
|
||||
group += [ c ]
|
||||
if len(group) >= 5:
|
||||
b = group[0] * (85**4) + \
|
||||
group[1] * (85**3) + \
|
||||
group[2] * (85**2) + \
|
||||
group[3] * 85 + \
|
||||
group[4]
|
||||
assert b < (2**32 - 1)
|
||||
c4 = chr((b >> 0) % 256)
|
||||
c3 = chr((b >> 8) % 256)
|
||||
c2 = chr((b >> 16) % 256)
|
||||
c1 = chr(b >> 24)
|
||||
retval += (c1 + c2 + c3 + c4)
|
||||
if hitEod:
|
||||
retval = retval[:-4+hitEod]
|
||||
group = []
|
||||
x += 1
|
||||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
def decodeStreamData(stream):
|
||||
from generic import NameObject
|
||||
filters = stream.get("/Filter", ())
|
||||
if len(filters) and not isinstance(filters[0], NameObject):
|
||||
# we have a single filter instance
|
||||
filters = (filters,)
|
||||
data = stream._data
|
||||
for filterType in filters:
|
||||
if filterType == "/FlateDecode":
|
||||
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCIIHexDecode":
|
||||
data = ASCIIHexDecode.decode(data)
|
||||
elif filterType == "/ASCII85Decode":
|
||||
data = ASCII85Decode.decode(data)
|
||||
elif filterType == "/Crypt":
|
||||
decodeParams = stream.get("/DecodeParams", {})
|
||||
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||
else:
|
||||
# unsupported filter
|
||||
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
|
||||
|
||||
ascii85Test = """
|
||||
<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
|
||||
O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
|
||||
i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
|
||||
l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
|
||||
>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
|
||||
"""
|
||||
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
|
||||
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
|
@ -207,15 +206,18 @@ class FloatObject(decimal.Decimal, PdfObject):
|
|||
def __new__(cls, value="0", context=None):
|
||||
return decimal.Decimal.__new__(cls, str(value), context)
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
if self == self.to_integral():
|
||||
return str(self.quantize(decimal.Decimal(1)))
|
||||
else:
|
||||
# XXX: this adds useless extraneous zeros.
|
||||
return "%.5f" % self
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
stream.write(str(self))
|
||||
stream.write(repr(self))
|
||||
|
||||
|
||||
class NumberObject(int, PdfObject):
|
||||
def __init__(self, value):
|
||||
int.__init__(self)
|
||||
self = value
|
||||
int.__init__(value)
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
stream.write(repr(self))
|
||||
|
@ -301,7 +303,7 @@ def readStringFromStream(stream):
|
|||
elif tok == "t":
|
||||
tok = "\t"
|
||||
elif tok == "b":
|
||||
tok == "\b"
|
||||
tok = "\b"
|
||||
elif tok == "f":
|
||||
tok = "\f"
|
||||
elif tok == "(":
|
||||
|
@ -311,7 +313,17 @@ def readStringFromStream(stream):
|
|||
elif tok == "\\":
|
||||
tok = "\\"
|
||||
elif tok.isdigit():
|
||||
tok += stream.read(2)
|
||||
# "The number ddd may consist of one, two, or three
|
||||
# octal digits; high-order overflow shall be ignored.
|
||||
# Three octal digits shall be used, with leading zeros
|
||||
# as needed, if the next character of the string is also
|
||||
# a digit." (PDF reference 7.3.4.2, p 16)
|
||||
for i in range(2):
|
||||
ntok = stream.read(1)
|
||||
if ntok.isdigit():
|
||||
tok += ntok
|
||||
else:
|
||||
break
|
||||
tok = chr(int(tok, base=8))
|
||||
elif tok in "\n\r":
|
||||
# This case is hit when a backslash followed by a line
|
||||
|
@ -405,8 +417,7 @@ class NameObject(str, PdfObject):
|
|||
delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
|
||||
|
||||
def __init__(self, data):
|
||||
str.__init__(self)
|
||||
self = data
|
||||
str.__init__(data)
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
stream.write(self)
|
||||
|
@ -710,6 +721,12 @@ class RectangleObject(ArrayObject):
|
|||
def setUpperRight(self, value):
|
||||
self[2], self[3] = [self.ensureIsNumber(x) for x in value]
|
||||
|
||||
def getWidth(self):
|
||||
return self.getUpperRight_x() - self.getLowerLeft_x()
|
||||
|
||||
def getHeight(self):
|
||||
return self.getUpperRight_y() - self.getLowerLeft_x()
|
||||
|
||||
lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
|
||||
lowerRight = property(getLowerRight, setLowerRight, None, None)
|
||||
upperLeft = property(getUpperLeft, setUpperLeft, None, None)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
|
@ -39,7 +40,9 @@ It may be a solid base for future PDF file work in Python.
|
|||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
import math
|
||||
import struct
|
||||
from sys import version_info
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
|
@ -51,6 +54,14 @@ import warnings
|
|||
from generic import *
|
||||
from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
||||
|
||||
if version_info < ( 2, 4 ):
|
||||
from sets import ImmutableSet as frozenset
|
||||
|
||||
if version_info < ( 2, 5 ):
|
||||
from md5 import md5
|
||||
else:
|
||||
from hashlib import md5
|
||||
|
||||
##
|
||||
# This class supports writing PDF files out, given pages produced by another
|
||||
# class (typically {@link #PdfFileReader PdfFileReader}).
|
||||
|
@ -92,6 +103,21 @@ class PdfFileWriter(object):
|
|||
raise ValueError("pdf must be self")
|
||||
return self._objects[ido.idnum - 1]
|
||||
|
||||
##
|
||||
# Common method for inserting or adding a page to this PDF file.
|
||||
#
|
||||
# @param page The page to add to the document. This argument should be
|
||||
# an instance of {@link #PageObject PageObject}.
|
||||
# @param action The function which will insert the page in the dictionnary.
|
||||
# Takes: page list, page to add.
|
||||
def _addPage(self, page, action):
|
||||
assert page["/Type"] == "/Page"
|
||||
page[NameObject("/Parent")] = self._pages
|
||||
page = self._addObject(page)
|
||||
pages = self.getObject(self._pages)
|
||||
action(pages["/Kids"], page)
|
||||
pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
|
||||
|
||||
##
|
||||
# Adds a page to this PDF file. The page is usually acquired from a
|
||||
# {@link #PdfFileReader PdfFileReader} instance.
|
||||
|
@ -101,12 +127,64 @@ class PdfFileWriter(object):
|
|||
# @param page The page to add to the document. This argument should be
|
||||
# an instance of {@link #PageObject PageObject}.
|
||||
def addPage(self, page):
|
||||
assert page["/Type"] == "/Page"
|
||||
page[NameObject("/Parent")] = self._pages
|
||||
page = self._addObject(page)
|
||||
self._addPage(page, list.append)
|
||||
|
||||
##
|
||||
# Insert a page in this PDF file. The page is usually acquired from a
|
||||
# {@link #PdfFileReader PdfFileReader} instance.
|
||||
#
|
||||
# @param page The page to add to the document. This argument should be
|
||||
# an instance of {@link #PageObject PageObject}.
|
||||
# @param index Position at which the page will be inserted.
|
||||
def insertPage(self, page, index=0):
|
||||
self._addPage(page, lambda l, p: l.insert(index, p))
|
||||
|
||||
##
|
||||
# Retrieves a page by number from this PDF file.
|
||||
# @return Returns a {@link #PageObject PageObject} instance.
|
||||
def getPage(self, pageNumber):
|
||||
pages = self.getObject(self._pages)
|
||||
pages["/Kids"].append(page)
|
||||
pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
|
||||
# XXX: crude hack
|
||||
return pages["/Kids"][pageNumber].getObject()
|
||||
|
||||
##
|
||||
# Return the number of pages.
|
||||
# @return The number of pages.
|
||||
def getNumPages(self):
|
||||
pages = self.getObject(self._pages)
|
||||
return int(pages[NameObject("/Count")])
|
||||
|
||||
##
|
||||
# Append a blank page to this PDF file and returns it. If no page size
|
||||
# is specified, use the size of the last page; throw
|
||||
# PageSizeNotDefinedError if it doesn't exist.
|
||||
# @param width The width of the new page expressed in default user
|
||||
# space units.
|
||||
# @param height The height of the new page expressed in default user
|
||||
# space units.
|
||||
def addBlankPage(self, width=None, height=None):
|
||||
page = PageObject.createBlankPage(self, width, height)
|
||||
self.addPage(page)
|
||||
return page
|
||||
|
||||
##
|
||||
# Insert a blank page to this PDF file and returns it. If no page size
|
||||
# is specified, use the size of the page in the given index; throw
|
||||
# PageSizeNotDefinedError if it doesn't exist.
|
||||
# @param width The width of the new page expressed in default user
|
||||
# space units.
|
||||
# @param height The height of the new page expressed in default user
|
||||
# space units.
|
||||
# @param index Position to add the page.
|
||||
def insertBlankPage(self, width=None, height=None, index=0):
|
||||
if width is None or height is None and \
|
||||
(self.getNumPages() - 1) >= index:
|
||||
oldpage = self.getPage(index)
|
||||
width = oldpage.mediaBox.getWidth()
|
||||
height = oldpage.mediaBox.getHeight()
|
||||
page = PageObject.createBlankPage(self, width, height)
|
||||
self.insertPage(page, index)
|
||||
return page
|
||||
|
||||
##
|
||||
# Encrypt this PDF file with the PDF Standard encryption handler.
|
||||
|
@ -119,7 +197,7 @@ class PdfFileWriter(object):
|
|||
# encryption. When false, 40bit encryption will be used. By default, this
|
||||
# flag is on.
|
||||
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
|
||||
import md5, time, random
|
||||
import time, random
|
||||
if owner_pwd == None:
|
||||
owner_pwd = user_pwd
|
||||
if use_128bit:
|
||||
|
@ -133,8 +211,8 @@ class PdfFileWriter(object):
|
|||
# permit everything:
|
||||
P = -1
|
||||
O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
|
||||
ID_1 = md5.new(repr(time.time())).digest()
|
||||
ID_2 = md5.new(repr(random.random())).digest()
|
||||
ID_1 = md5(repr(time.time())).digest()
|
||||
ID_2 = md5(repr(random.random())).digest()
|
||||
self._ID = ArrayObject((ByteStringObject(ID_1), ByteStringObject(ID_2)))
|
||||
if rev == 2:
|
||||
U, key = _alg34(user_pwd, O, P, ID_1)
|
||||
|
@ -160,9 +238,28 @@ class PdfFileWriter(object):
|
|||
# @param stream An object to write the file to. The object must support
|
||||
# the write method, and the tell method, similar to a file object.
|
||||
def write(self, stream):
|
||||
import struct, md5
|
||||
import struct
|
||||
|
||||
externalReferenceMap = {}
|
||||
|
||||
# PDF objects sometimes have circular references to their /Page objects
|
||||
# inside their object tree (for example, annotations). Those will be
|
||||
# indirect references to objects that we've recreated in this PDF. To
|
||||
# address this problem, PageObject's store their original object
|
||||
# reference number, and we add it to the external reference map before
|
||||
# we sweep for indirect references. This forces self-page-referencing
|
||||
# trees to reference the correct new object location, rather than
|
||||
# copying in a new copy of the page object.
|
||||
for objIndex in xrange(len(self._objects)):
|
||||
obj = self._objects[objIndex]
|
||||
if isinstance(obj, PageObject) and obj.indirectRef != None:
|
||||
data = obj.indirectRef
|
||||
if not externalReferenceMap.has_key(data.pdf):
|
||||
externalReferenceMap[data.pdf] = {}
|
||||
if not externalReferenceMap[data.pdf].has_key(data.generation):
|
||||
externalReferenceMap[data.pdf][data.generation] = {}
|
||||
externalReferenceMap[data.pdf][data.generation][data.idnum] = IndirectObject(objIndex + 1, 0, self)
|
||||
|
||||
self.stack = []
|
||||
self._sweepIndirectReferences(externalReferenceMap, self._root)
|
||||
del self.stack
|
||||
|
@ -181,7 +278,7 @@ class PdfFileWriter(object):
|
|||
pack2 = struct.pack("<i", 0)[:2]
|
||||
key = self._encrypt_key + pack1 + pack2
|
||||
assert len(key) == (len(self._encrypt_key) + 5)
|
||||
md5_hash = md5.new(key).digest()
|
||||
md5_hash = md5(key).digest()
|
||||
key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
|
||||
obj.writeToStream(stream, key)
|
||||
stream.write("\nendobj\n")
|
||||
|
@ -487,7 +584,7 @@ class PdfFileReader(object):
|
|||
pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
|
||||
None, None)
|
||||
|
||||
def _flatten(self, pages=None, inherit=None):
|
||||
def _flatten(self, pages=None, inherit=None, indirectRef=None):
|
||||
inheritablePageAttributes = (
|
||||
NameObject("/Resources"), NameObject("/MediaBox"),
|
||||
NameObject("/CropBox"), NameObject("/Rotate")
|
||||
|
@ -504,14 +601,17 @@ class PdfFileReader(object):
|
|||
if pages.has_key(attr):
|
||||
inherit[attr] = pages[attr]
|
||||
for page in pages["/Kids"]:
|
||||
self._flatten(page.getObject(), inherit)
|
||||
addt = {}
|
||||
if isinstance(page, IndirectObject):
|
||||
addt["indirectRef"] = page
|
||||
self._flatten(page.getObject(), inherit, **addt)
|
||||
elif t == "/Page":
|
||||
for attr,value in inherit.items():
|
||||
# if the page has it's own value, it does not inherit the
|
||||
# parent's value:
|
||||
if not pages.has_key(attr):
|
||||
pages[attr] = value
|
||||
pageObj = PageObject(self)
|
||||
pageObj = PageObject(self, indirectRef)
|
||||
pageObj.update(pages)
|
||||
self.flattenedPages.append(pageObj)
|
||||
|
||||
|
@ -554,12 +654,12 @@ class PdfFileReader(object):
|
|||
if not hasattr(self, '_decryption_key'):
|
||||
raise Exception, "file has not been decrypted"
|
||||
# otherwise, decrypt here...
|
||||
import struct, md5
|
||||
import struct
|
||||
pack1 = struct.pack("<i", indirectReference.idnum)[:3]
|
||||
pack2 = struct.pack("<i", indirectReference.generation)[:2]
|
||||
key = self._decryption_key + pack1 + pack2
|
||||
assert len(key) == (len(self._decryption_key) + 5)
|
||||
md5_hash = md5.new(key).digest()
|
||||
md5_hash = md5(key).digest()
|
||||
key = md5_hash[:min(16, len(self._decryption_key) + 5)]
|
||||
retval = self._decryptObject(retval, key)
|
||||
|
||||
|
@ -890,11 +990,46 @@ def createRectangleAccessor(name, fallback):
|
|||
##
|
||||
# This class represents a single page within a PDF file. Typically this object
|
||||
# will be created by accessing the {@link #PdfFileReader.getPage getPage}
|
||||
# function of the {@link #PdfFileReader PdfFileReader} class.
|
||||
# function of the {@link #PdfFileReader PdfFileReader} class, but it is
|
||||
# also possible to create an empty page with the createBlankPage static
|
||||
# method.
|
||||
# @param pdf PDF file the page belongs to (optional, defaults to None).
|
||||
class PageObject(DictionaryObject):
|
||||
def __init__(self, pdf):
|
||||
def __init__(self, pdf=None, indirectRef=None):
|
||||
DictionaryObject.__init__(self)
|
||||
self.pdf = pdf
|
||||
# Stores the original indirect reference to this object in its source PDF
|
||||
self.indirectRef = indirectRef
|
||||
|
||||
##
|
||||
# Returns a new blank page.
|
||||
# If width or height is None, try to get the page size from the
|
||||
# last page of pdf. If pdf is None or contains no page, a
|
||||
# PageSizeNotDefinedError is raised.
|
||||
# @param pdf PDF file the page belongs to
|
||||
# @param width The width of the new page expressed in default user
|
||||
# space units.
|
||||
# @param height The height of the new page expressed in default user
|
||||
# space units.
|
||||
def createBlankPage(pdf=None, width=None, height=None):
|
||||
page = PageObject(pdf)
|
||||
|
||||
# Creates a new page (cf PDF Reference 7.7.3.3)
|
||||
page.__setitem__(NameObject('/Type'), NameObject('/Page'))
|
||||
page.__setitem__(NameObject('/Parent'), NullObject())
|
||||
page.__setitem__(NameObject('/Resources'), DictionaryObject())
|
||||
if width is None or height is None:
|
||||
if pdf is not None and pdf.getNumPages() > 0:
|
||||
lastpage = pdf.getPage(pdf.getNumPages() - 1)
|
||||
width = lastpage.mediaBox.getWidth()
|
||||
height = lastpage.mediaBox.getHeight()
|
||||
else:
|
||||
raise utils.PageSizeNotDefinedError()
|
||||
page.__setitem__(NameObject('/MediaBox'),
|
||||
RectangleObject([0, 0, width, height]))
|
||||
|
||||
return page
|
||||
createBlankPage = staticmethod(createBlankPage)
|
||||
|
||||
##
|
||||
# Rotates a page clockwise by increments of 90 degrees.
|
||||
|
@ -931,7 +1066,7 @@ class PageObject(DictionaryObject):
|
|||
renameRes[key] = newname
|
||||
newRes[newname] = page2Res[key]
|
||||
elif not newRes.has_key(key):
|
||||
newRes[key] = page2Res[key]
|
||||
newRes[key] = page2Res.raw_get(key)
|
||||
return newRes, renameRes
|
||||
_mergeResources = staticmethod(_mergeResources)
|
||||
|
||||
|
@ -957,6 +1092,26 @@ class PageObject(DictionaryObject):
|
|||
return stream
|
||||
_pushPopGS = staticmethod(_pushPopGS)
|
||||
|
||||
def _addTransformationMatrix(contents, pdf, ctm):
|
||||
# adds transformation matrix at the beginning of the given
|
||||
# contents stream.
|
||||
a, b, c, d, e, f = ctm
|
||||
contents = ContentStream(contents, pdf)
|
||||
contents.operations.insert(0, [[FloatObject(a), FloatObject(b),
|
||||
FloatObject(c), FloatObject(d), FloatObject(e),
|
||||
FloatObject(f)], " cm"])
|
||||
return contents
|
||||
_addTransformationMatrix = staticmethod(_addTransformationMatrix)
|
||||
|
||||
##
|
||||
# Returns the /Contents object, or None if it doesn't exist.
|
||||
# /Contents is optionnal, as described in PDF Reference 7.7.3.3
|
||||
def getContents(self):
|
||||
if self.has_key("/Contents"):
|
||||
return self["/Contents"].getObject()
|
||||
else:
|
||||
return None
|
||||
|
||||
##
|
||||
# Merges the content streams of two pages into one. Resource references
|
||||
# (i.e. fonts) are maintained from both pages. The mediabox/cropbox/etc
|
||||
|
@ -968,7 +1123,23 @@ class PageObject(DictionaryObject):
|
|||
# @param page2 An instance of {@link #PageObject PageObject} to be merged
|
||||
# into this one.
|
||||
def mergePage(self, page2):
|
||||
self._mergePage(page2)
|
||||
|
||||
##
|
||||
# Actually merges the content streams of two pages into one. Resource
|
||||
# references (i.e. fonts) are maintained from both pages. The
|
||||
# mediabox/cropbox/etc of this page are not altered. The parameter page's
|
||||
# content stream will be added to the end of this page's content stream,
|
||||
# meaning that it will be drawn after, or "on top" of this page.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged
|
||||
# into this one.
|
||||
# @param page2transformation A fuction which applies a transformation to
|
||||
# the content stream of page2. Takes: page2
|
||||
# contents stream. Must return: new contents
|
||||
# stream. If omitted, the content stream will
|
||||
# not be modified.
|
||||
def _mergePage(self, page2, page2transformation=None):
|
||||
# First we work on merging the resource dictionaries. This allows us
|
||||
# to find out what symbols in the content streams we might need to
|
||||
# rename.
|
||||
|
@ -978,7 +1149,7 @@ class PageObject(DictionaryObject):
|
|||
originalResources = self["/Resources"].getObject()
|
||||
page2Resources = page2["/Resources"].getObject()
|
||||
|
||||
for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading":
|
||||
for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading", "/Properties":
|
||||
new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
|
||||
if new:
|
||||
newResources[NameObject(res)] = new
|
||||
|
@ -993,17 +1164,191 @@ class PageObject(DictionaryObject):
|
|||
|
||||
newContentArray = ArrayObject()
|
||||
|
||||
originalContent = self["/Contents"].getObject()
|
||||
newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf))
|
||||
originalContent = self.getContents()
|
||||
if originalContent is not None:
|
||||
newContentArray.append(PageObject._pushPopGS(
|
||||
originalContent, self.pdf))
|
||||
|
||||
page2Content = page2['/Contents'].getObject()
|
||||
page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf)
|
||||
page2Content = PageObject._pushPopGS(page2Content, self.pdf)
|
||||
newContentArray.append(page2Content)
|
||||
page2Content = page2.getContents()
|
||||
if page2Content is not None:
|
||||
if page2transformation is not None:
|
||||
page2Content = page2transformation(page2Content)
|
||||
page2Content = PageObject._contentStreamRename(
|
||||
page2Content, rename, self.pdf)
|
||||
page2Content = PageObject._pushPopGS(page2Content, self.pdf)
|
||||
newContentArray.append(page2Content)
|
||||
|
||||
self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
|
||||
self[NameObject('/Resources')] = newResources
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but a transformation matrix is
|
||||
# applied to the merged stream.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param ctm A 6 elements tuple containing the operands of the
|
||||
# transformation matrix
|
||||
def mergeTransformedPage(self, page2, ctm):
|
||||
self._mergePage(page2, lambda page2Content:
|
||||
PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm))
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is scaled
|
||||
# by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param factor The scaling factor
|
||||
def mergeScaledPage(self, page2, factor):
|
||||
# CTM to scale : [ sx 0 0 sy 0 0 ]
|
||||
return self.mergeTransformedPage(page2, [factor, 0,
|
||||
0, factor,
|
||||
0, 0])
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is rotated
|
||||
# by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param rotation The angle of the rotation, in degrees
|
||||
def mergeRotatedPage(self, page2, rotation):
|
||||
rotation = math.radians(rotation)
|
||||
return self.mergeTransformedPage(page2,
|
||||
[math.cos(rotation), math.sin(rotation),
|
||||
-math.sin(rotation), math.cos(rotation),
|
||||
0, 0])
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is translated
|
||||
# by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param tx The translation on X axis
|
||||
# @param tx The translation on Y axis
|
||||
def mergeTranslatedPage(self, page2, tx, ty):
|
||||
return self.mergeTransformedPage(page2, [1, 0,
|
||||
0, 1,
|
||||
tx, ty])
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is rotated
|
||||
# and scaled by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param rotation The angle of the rotation, in degrees
|
||||
# @param factor The scaling factor
|
||||
def mergeRotatedScaledPage(self, page2, rotation, scale):
|
||||
rotation = math.radians(rotation)
|
||||
rotating = [[math.cos(rotation), math.sin(rotation),0],
|
||||
[-math.sin(rotation),math.cos(rotation), 0],
|
||||
[0, 0, 1]]
|
||||
scaling = [[scale,0, 0],
|
||||
[0, scale,0],
|
||||
[0, 0, 1]]
|
||||
ctm = utils.matrixMultiply(rotating, scaling)
|
||||
|
||||
return self.mergeTransformedPage(page2,
|
||||
[ctm[0][0], ctm[0][1],
|
||||
ctm[1][0], ctm[1][1],
|
||||
ctm[2][0], ctm[2][1]])
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is translated
|
||||
# and scaled by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param scale The scaling factor
|
||||
# @param tx The translation on X axis
|
||||
# @param tx The translation on Y axis
|
||||
def mergeScaledTranslatedPage(self, page2, scale, tx, ty):
|
||||
translation = [[1, 0, 0],
|
||||
[0, 1, 0],
|
||||
[tx,ty,1]]
|
||||
scaling = [[scale,0, 0],
|
||||
[0, scale,0],
|
||||
[0, 0, 1]]
|
||||
ctm = utils.matrixMultiply(scaling, translation)
|
||||
|
||||
return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
|
||||
ctm[1][0], ctm[1][1],
|
||||
ctm[2][0], ctm[2][1]])
|
||||
|
||||
##
|
||||
# This is similar to mergePage, but the stream to be merged is translated,
|
||||
# rotated and scaled by appling a transformation matrix.
|
||||
#
|
||||
# @param page2 An instance of {@link #PageObject PageObject} to be merged.
|
||||
# @param tx The translation on X axis
|
||||
# @param ty The translation on Y axis
|
||||
# @param rotation The angle of the rotation, in degrees
|
||||
# @param scale The scaling factor
|
||||
def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty):
|
||||
translation = [[1, 0, 0],
|
||||
[0, 1, 0],
|
||||
[tx,ty,1]]
|
||||
rotation = math.radians(rotation)
|
||||
rotating = [[math.cos(rotation), math.sin(rotation),0],
|
||||
[-math.sin(rotation),math.cos(rotation), 0],
|
||||
[0, 0, 1]]
|
||||
scaling = [[scale,0, 0],
|
||||
[0, scale,0],
|
||||
[0, 0, 1]]
|
||||
ctm = utils.matrixMultiply(rotating, scaling)
|
||||
ctm = utils.matrixMultiply(ctm, translation)
|
||||
|
||||
return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
|
||||
ctm[1][0], ctm[1][1],
|
||||
ctm[2][0], ctm[2][1]])
|
||||
|
||||
##
|
||||
# Applys a transformation matrix the page.
|
||||
#
|
||||
# @param ctm A 6 elements tuple containing the operands of the
|
||||
# transformation matrix
|
||||
def addTransformation(self, ctm):
|
||||
originalContent = self.getContents()
|
||||
if originalContent is not None:
|
||||
newContent = PageObject._addTransformationMatrix(
|
||||
originalContent, self.pdf, ctm)
|
||||
newContent = PageObject._pushPopGS(newContent, self.pdf)
|
||||
self[NameObject('/Contents')] = newContent
|
||||
|
||||
##
|
||||
# Scales a page by the given factors by appling a transformation
|
||||
# matrix to its content and updating the page size.
|
||||
#
|
||||
# @param sx The scaling factor on horizontal axis
|
||||
# @param sy The scaling factor on vertical axis
|
||||
def scale(self, sx, sy):
|
||||
self.addTransformation([sx, 0,
|
||||
0, sy,
|
||||
0, 0])
|
||||
self.mediaBox = RectangleObject([
|
||||
float(self.mediaBox.getLowerLeft_x()) * sx,
|
||||
float(self.mediaBox.getLowerLeft_y()) * sy,
|
||||
float(self.mediaBox.getUpperRight_x()) * sx,
|
||||
float(self.mediaBox.getUpperRight_y()) * sy])
|
||||
|
||||
##
|
||||
# Scales a page by the given factor by appling a transformation
|
||||
# matrix to its content and updating the page size.
|
||||
#
|
||||
# @param factor The scaling factor
|
||||
def scaleBy(self, factor):
|
||||
self.scale(factor, factor)
|
||||
|
||||
##
|
||||
# Scales a page to the specified dimentions by appling a
|
||||
# transformation matrix to its content and updating the page size.
|
||||
#
|
||||
# @param width The new width
|
||||
# @param height The new heigth
|
||||
def scaleTo(self, width, height):
|
||||
sx = width / (self.mediaBox.getUpperRight_x() -
|
||||
self.mediaBox.getLowerLeft_x ())
|
||||
sy = height / (self.mediaBox.getUpperRight_y() -
|
||||
self.mediaBox.getLowerLeft_x ())
|
||||
self.scale(sx, sy)
|
||||
|
||||
##
|
||||
# Compresses the size of this page by joining all content streams and
|
||||
# applying a FlateDecode filter.
|
||||
|
@ -1012,10 +1357,11 @@ class PageObject(DictionaryObject):
|
|||
# However, it is possible that this function will perform no action if
|
||||
# content stream compression becomes "automatic" for some reason.
|
||||
def compressContentStreams(self):
|
||||
content = self["/Contents"].getObject()
|
||||
if not isinstance(content, ContentStream):
|
||||
content = ContentStream(content, self.pdf)
|
||||
self[NameObject("/Contents")] = content.flateEncode()
|
||||
content = self.getContents()
|
||||
if content is not None:
|
||||
if not isinstance(content, ContentStream):
|
||||
content = ContentStream(content, self.pdf)
|
||||
self[NameObject("/Contents")] = content.flateEncode()
|
||||
|
||||
##
|
||||
# Locate all text drawing commands, in the order they are provided in the
|
||||
|
@ -1369,8 +1715,8 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
|||
password = (password + _encryption_padding)[:32]
|
||||
# 2. Initialize the MD5 hash function and pass the result of step 1 as
|
||||
# input to this function.
|
||||
import md5, struct
|
||||
m = md5.new(password)
|
||||
import struct
|
||||
m = md5(password)
|
||||
# 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
|
||||
# function.
|
||||
m.update(owner_entry)
|
||||
|
@ -1394,7 +1740,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
|||
# /Length entry.
|
||||
if rev >= 3:
|
||||
for i in range(50):
|
||||
md5_hash = md5.new(md5_hash[:keylen]).digest()
|
||||
md5_hash = md5(md5_hash[:keylen]).digest()
|
||||
# 9. Set the encryption key to the first n bytes of the output from the
|
||||
# final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
|
||||
# greater, depends on the value of the encryption dictionary's /Length
|
||||
|
@ -1436,14 +1782,13 @@ def _alg33_1(password, rev, keylen):
|
|||
password = (password + _encryption_padding)[:32]
|
||||
# 2. Initialize the MD5 hash function and pass the result of step 1 as
|
||||
# input to this function.
|
||||
import md5
|
||||
m = md5.new(password)
|
||||
m = md5(password)
|
||||
# 3. (Revision 3 or greater) Do the following 50 times: Take the output
|
||||
# from the previous MD5 hash and pass it as input into a new MD5 hash.
|
||||
md5_hash = m.digest()
|
||||
if rev >= 3:
|
||||
for i in range(50):
|
||||
md5_hash = md5.new(md5_hash).digest()
|
||||
md5_hash = md5(md5_hash).digest()
|
||||
# 4. Create an RC4 encryption key using the first n bytes of the output
|
||||
# from the final MD5 hash, where n is always 5 for revision 2 but, for
|
||||
# revision 3 or greater, depends on the value of the encryption
|
||||
|
@ -1473,8 +1818,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
|||
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
|
||||
# 2. Initialize the MD5 hash function and pass the 32-byte padding string
|
||||
# shown in step 1 of Algorithm 3.2 as input to this function.
|
||||
import md5
|
||||
m = md5.new()
|
||||
m = md5()
|
||||
m.update(_encryption_padding)
|
||||
# 3. Pass the first element of the file's file identifier array (the value
|
||||
# of the ID entry in the document's trailer dictionary; see Table 3.13 on
|
||||
|
|
|
@ -1,111 +1,122 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
"""
|
||||
Utility functions for PDF library.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
#ENABLE_PSYCO = False
|
||||
#if ENABLE_PSYCO:
|
||||
# try:
|
||||
# import psyco
|
||||
# except ImportError:
|
||||
# ENABLE_PSYCO = False
|
||||
#
|
||||
#if not ENABLE_PSYCO:
|
||||
# class psyco:
|
||||
# def proxy(func):
|
||||
# return func
|
||||
# proxy = staticmethod(proxy)
|
||||
|
||||
def readUntilWhitespace(stream, maxchars=None):
|
||||
txt = ""
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if tok.isspace() or not tok:
|
||||
break
|
||||
txt += tok
|
||||
if len(txt) == maxchars:
|
||||
break
|
||||
return txt
|
||||
|
||||
def readNonWhitespace(stream):
|
||||
tok = ' '
|
||||
while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
|
||||
tok = stream.read(1)
|
||||
return tok
|
||||
|
||||
class ConvertFunctionsToVirtualList(object):
|
||||
def __init__(self, lengthFunction, getFunction):
|
||||
self.lengthFunction = lengthFunction
|
||||
self.getFunction = getFunction
|
||||
|
||||
def __len__(self):
|
||||
return self.lengthFunction()
|
||||
|
||||
def __getitem__(self, index):
|
||||
if not isinstance(index, int):
|
||||
raise TypeError, "sequence indices must be integers"
|
||||
len_self = len(self)
|
||||
if index < 0:
|
||||
# support negative indexes
|
||||
index = len_self + index
|
||||
if index < 0 or index >= len_self:
|
||||
raise IndexError, "sequence index out of range"
|
||||
return self.getFunction(index)
|
||||
|
||||
def RC4_encrypt(key, plaintext):
|
||||
S = [i for i in range(256)]
|
||||
j = 0
|
||||
for i in range(256):
|
||||
j = (j + S[i] + ord(key[i % len(key)])) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
i, j = 0, 0
|
||||
retval = ""
|
||||
for x in range(len(plaintext)):
|
||||
i = (i + 1) % 256
|
||||
j = (j + S[i]) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
t = S[(S[i] + S[j]) % 256]
|
||||
retval += chr(ord(plaintext[x]) ^ t)
|
||||
return retval
|
||||
|
||||
class PdfReadError(Exception):
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test RC4
|
||||
out = RC4_encrypt("Key", "Plaintext")
|
||||
print repr(out)
|
||||
pt = RC4_encrypt("Key", out)
|
||||
print repr(pt)
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
"""
|
||||
Utility functions for PDF library.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
#ENABLE_PSYCO = False
|
||||
#if ENABLE_PSYCO:
|
||||
# try:
|
||||
# import psyco
|
||||
# except ImportError:
|
||||
# ENABLE_PSYCO = False
|
||||
#
|
||||
#if not ENABLE_PSYCO:
|
||||
# class psyco:
|
||||
# def proxy(func):
|
||||
# return func
|
||||
# proxy = staticmethod(proxy)
|
||||
|
||||
def readUntilWhitespace(stream, maxchars=None):
|
||||
txt = ""
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if tok.isspace() or not tok:
|
||||
break
|
||||
txt += tok
|
||||
if len(txt) == maxchars:
|
||||
break
|
||||
return txt
|
||||
|
||||
def readNonWhitespace(stream):
|
||||
tok = ' '
|
||||
while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
|
||||
tok = stream.read(1)
|
||||
return tok
|
||||
|
||||
class ConvertFunctionsToVirtualList(object):
|
||||
def __init__(self, lengthFunction, getFunction):
|
||||
self.lengthFunction = lengthFunction
|
||||
self.getFunction = getFunction
|
||||
|
||||
def __len__(self):
|
||||
return self.lengthFunction()
|
||||
|
||||
def __getitem__(self, index):
|
||||
if not isinstance(index, int):
|
||||
raise TypeError, "sequence indices must be integers"
|
||||
len_self = len(self)
|
||||
if index < 0:
|
||||
# support negative indexes
|
||||
index = len_self + index
|
||||
if index < 0 or index >= len_self:
|
||||
raise IndexError, "sequence index out of range"
|
||||
return self.getFunction(index)
|
||||
|
||||
def RC4_encrypt(key, plaintext):
|
||||
S = [i for i in range(256)]
|
||||
j = 0
|
||||
for i in range(256):
|
||||
j = (j + S[i] + ord(key[i % len(key)])) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
i, j = 0, 0
|
||||
retval = ""
|
||||
for x in range(len(plaintext)):
|
||||
i = (i + 1) % 256
|
||||
j = (j + S[i]) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
t = S[(S[i] + S[j]) % 256]
|
||||
retval += chr(ord(plaintext[x]) ^ t)
|
||||
return retval
|
||||
|
||||
def matrixMultiply(a, b):
|
||||
return [[sum([float(i)*float(j)
|
||||
for i, j in zip(row, col)]
|
||||
) for col in zip(*b)]
|
||||
for row in a]
|
||||
|
||||
class PyPdfError(Exception):
|
||||
pass
|
||||
|
||||
class PdfReadError(PyPdfError):
|
||||
pass
|
||||
|
||||
class PageSizeNotDefinedError(PyPdfError):
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test RC4
|
||||
out = RC4_encrypt("Key", "Plaintext")
|
||||
print repr(out)
|
||||
pt = RC4_encrypt("Key", out)
|
||||
print repr(pt)
|
||||
|
|
|
@ -1,356 +1,355 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import datetime
|
||||
import decimal
|
||||
from generic import PdfObject
|
||||
from xml.dom import getDOMImplementation
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
|
||||
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
|
||||
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
|
||||
|
||||
# What is the PDFX namespace, you might ask? I might ask that too. It's
|
||||
# a completely undocumented namespace used to place "custom metadata"
|
||||
# properties, which are arbitrary metadata properties with no semantic or
|
||||
# documented meaning. Elements in the namespace are key/value-style storage,
|
||||
# where the element name is the key and the content is the value. The keys
|
||||
# are transformed into valid XML identifiers by substituting an invalid
|
||||
# identifier character with \u2182 followed by the unicode hex ID of the
|
||||
# original character. A key like "my car" is therefore "my\u21820020car".
|
||||
#
|
||||
# \u2182, in case you're wondering, is the unicode character
|
||||
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
|
||||
# escaping characters.
|
||||
#
|
||||
# Intentional users of the pdfx namespace should be shot on sight. A
|
||||
# custom data schema and sensical XML elements could be used instead, as is
|
||||
# suggested by Adobe's own documentation on XMP (under "Extensibility of
|
||||
# Schemas").
|
||||
#
|
||||
# Information presented here on the /pdfx/ schema is a result of limited
|
||||
# reverse engineering, and does not constitute a full specification.
|
||||
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
|
||||
|
||||
iso8601 = re.compile("""
|
||||
(?P<year>[0-9]{4})
|
||||
(-
|
||||
(?P<month>[0-9]{2})
|
||||
(-
|
||||
(?P<day>[0-9]+)
|
||||
(T
|
||||
(?P<hour>[0-9]{2}):
|
||||
(?P<minute>[0-9]{2})
|
||||
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
|
||||
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
|
||||
)?
|
||||
)?
|
||||
)?
|
||||
""", re.VERBOSE)
|
||||
|
||||
##
|
||||
# An object that represents Adobe XMP metadata.
|
||||
class XmpInformation(PdfObject):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
docRoot = parseString(self.stream.getData())
|
||||
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
|
||||
self.cache = {}
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
self.stream.writeToStream(stream, encryption_key)
|
||||
|
||||
def getElement(self, aboutUri, namespace, name):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
attr = desc.getAttributeNodeNS(namespace, name)
|
||||
if attr != None:
|
||||
yield attr
|
||||
for element in desc.getElementsByTagNameNS(namespace, name):
|
||||
yield element
|
||||
|
||||
def getNodesInNamespace(self, aboutUri, namespace):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
for i in range(desc.attributes.length):
|
||||
attr = desc.attributes.item(i)
|
||||
if attr.namespaceURI == namespace:
|
||||
yield attr
|
||||
for child in desc.childNodes:
|
||||
if child.namespaceURI == namespace:
|
||||
yield child
|
||||
|
||||
def _getText(self, element):
|
||||
text = ""
|
||||
for child in element.childNodes:
|
||||
if child.nodeType == child.TEXT_NODE:
|
||||
text += child.data
|
||||
return text
|
||||
|
||||
def _converter_string(value):
|
||||
return value
|
||||
|
||||
def _converter_date(value):
|
||||
m = iso8601.match(value)
|
||||
year = int(m.group("year"))
|
||||
month = int(m.group("month") or "1")
|
||||
day = int(m.group("day") or "1")
|
||||
hour = int(m.group("hour") or "0")
|
||||
minute = int(m.group("minute") or "0")
|
||||
second = decimal.Decimal(m.group("second") or "0")
|
||||
seconds = second.to_integral(decimal.ROUND_FLOOR)
|
||||
milliseconds = (second - seconds) * 1000000
|
||||
tzd = m.group("tzd") or "Z"
|
||||
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
|
||||
if tzd != "Z":
|
||||
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
|
||||
tzd_hours *= -1
|
||||
if tzd_hours < 0:
|
||||
tzd_minutes *= -1
|
||||
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
|
||||
return dt
|
||||
_test_converter_date = staticmethod(_converter_date)
|
||||
|
||||
def _getter_bag(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
|
||||
if len(bags):
|
||||
for bag in bags:
|
||||
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_seq(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
|
||||
if len(seqs):
|
||||
for seq in seqs:
|
||||
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
else:
|
||||
value = converter(self._getText(element))
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_langalt(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = {}
|
||||
for element in self.getElement("", namespace, name):
|
||||
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
|
||||
if len(alts):
|
||||
for alt in alts:
|
||||
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval[item.getAttribute("xml:lang")] = value
|
||||
else:
|
||||
retval["x-default"] = converter(self._getText(element))
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_single(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
value = None
|
||||
for element in self.getElement("", namespace, name):
|
||||
if element.nodeType == element.ATTRIBUTE_NODE:
|
||||
value = element.nodeValue
|
||||
else:
|
||||
value = self._getText(element)
|
||||
break
|
||||
if value != None:
|
||||
value = converter(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = value
|
||||
return value
|
||||
return get
|
||||
|
||||
##
|
||||
# Contributors to the resource (other than the authors). An unsorted
|
||||
# array of names.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
|
||||
|
||||
##
|
||||
# Text describing the extent or scope of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
|
||||
|
||||
##
|
||||
# A sorted array of names of the authors of the resource, listed in order
|
||||
# of precedence.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
|
||||
|
||||
##
|
||||
# A sorted array of dates (datetime.datetime instances) of signifigance to
|
||||
# the resource. The dates and times are in UTC.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of textual descriptions of the content of the
|
||||
# resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
|
||||
|
||||
##
|
||||
# The mime-type of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
|
||||
|
||||
##
|
||||
# Unique identifier of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array specifying the languages used in the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of publisher names.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of text descriptions of relationships to other
|
||||
# documents.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of textual descriptions of the rights the
|
||||
# user has to this resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
|
||||
|
||||
##
|
||||
# Unique identifier of the work from which this resource was derived.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of descriptive phrases or keywrods that specify the
|
||||
# topic of the content of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of the title of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of textual descriptions of the document type.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
|
||||
|
||||
##
|
||||
# An unformatted text string representing document keywords.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
|
||||
|
||||
##
|
||||
# The PDF file version, for example 1.0, 1.3.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
|
||||
|
||||
##
|
||||
# The name of the tool that created the PDF document.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
|
||||
|
||||
##
|
||||
# The date and time the resource was originally created. The date and
|
||||
# time are returned as a UTC datetime.datetime object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
|
||||
|
||||
##
|
||||
# The date and time the resource was last modified. The date and time
|
||||
# are returned as a UTC datetime.datetime object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
|
||||
|
||||
##
|
||||
# The date and time that any metadata for this resource was last
|
||||
# changed. The date and time are returned as a UTC datetime.datetime
|
||||
# object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
|
||||
|
||||
##
|
||||
# The name of the first known tool used to create the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
|
||||
|
||||
##
|
||||
# The common identifier for all versions and renditions of this resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
|
||||
|
||||
##
|
||||
# An identifier for a specific incarnation of a document, updated each
|
||||
# time a file is saved.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
|
||||
|
||||
def custom_properties(self):
|
||||
if not hasattr(self, "_custom_properties"):
|
||||
self._custom_properties = {}
|
||||
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
|
||||
key = node.localName
|
||||
while True:
|
||||
# see documentation about PDFX_NAMESPACE earlier in file
|
||||
idx = key.find(u"\u2182")
|
||||
if idx == -1:
|
||||
break
|
||||
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
|
||||
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||
value = node.nodeValue
|
||||
else:
|
||||
value = self._getText(node)
|
||||
self._custom_properties[key] = value
|
||||
return self._custom_properties
|
||||
|
||||
##
|
||||
# Retrieves custom metadata properties defined in the undocumented pdfx
|
||||
# metadata schema.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
# @return Returns a dictionary of key/value items for custom metadata
|
||||
# properties.
|
||||
custom_properties = property(custom_properties)
|
||||
|
||||
|
||||
import re
|
||||
import datetime
|
||||
import decimal
|
||||
from generic import PdfObject
|
||||
from xml.dom import getDOMImplementation
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
|
||||
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
|
||||
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
|
||||
|
||||
# What is the PDFX namespace, you might ask? I might ask that too. It's
|
||||
# a completely undocumented namespace used to place "custom metadata"
|
||||
# properties, which are arbitrary metadata properties with no semantic or
|
||||
# documented meaning. Elements in the namespace are key/value-style storage,
|
||||
# where the element name is the key and the content is the value. The keys
|
||||
# are transformed into valid XML identifiers by substituting an invalid
|
||||
# identifier character with \u2182 followed by the unicode hex ID of the
|
||||
# original character. A key like "my car" is therefore "my\u21820020car".
|
||||
#
|
||||
# \u2182, in case you're wondering, is the unicode character
|
||||
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
|
||||
# escaping characters.
|
||||
#
|
||||
# Intentional users of the pdfx namespace should be shot on sight. A
|
||||
# custom data schema and sensical XML elements could be used instead, as is
|
||||
# suggested by Adobe's own documentation on XMP (under "Extensibility of
|
||||
# Schemas").
|
||||
#
|
||||
# Information presented here on the /pdfx/ schema is a result of limited
|
||||
# reverse engineering, and does not constitute a full specification.
|
||||
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
|
||||
|
||||
iso8601 = re.compile("""
|
||||
(?P<year>[0-9]{4})
|
||||
(-
|
||||
(?P<month>[0-9]{2})
|
||||
(-
|
||||
(?P<day>[0-9]+)
|
||||
(T
|
||||
(?P<hour>[0-9]{2}):
|
||||
(?P<minute>[0-9]{2})
|
||||
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
|
||||
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
|
||||
)?
|
||||
)?
|
||||
)?
|
||||
""", re.VERBOSE)
|
||||
|
||||
##
|
||||
# An object that represents Adobe XMP metadata.
|
||||
class XmpInformation(PdfObject):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
docRoot = parseString(self.stream.getData())
|
||||
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
|
||||
self.cache = {}
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
self.stream.writeToStream(stream, encryption_key)
|
||||
|
||||
def getElement(self, aboutUri, namespace, name):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
attr = desc.getAttributeNodeNS(namespace, name)
|
||||
if attr != None:
|
||||
yield attr
|
||||
for element in desc.getElementsByTagNameNS(namespace, name):
|
||||
yield element
|
||||
|
||||
def getNodesInNamespace(self, aboutUri, namespace):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
for i in range(desc.attributes.length):
|
||||
attr = desc.attributes.item(i)
|
||||
if attr.namespaceURI == namespace:
|
||||
yield attr
|
||||
for child in desc.childNodes:
|
||||
if child.namespaceURI == namespace:
|
||||
yield child
|
||||
|
||||
def _getText(self, element):
|
||||
text = ""
|
||||
for child in element.childNodes:
|
||||
if child.nodeType == child.TEXT_NODE:
|
||||
text += child.data
|
||||
return text
|
||||
|
||||
def _converter_string(value):
|
||||
return value
|
||||
|
||||
def _converter_date(value):
|
||||
m = iso8601.match(value)
|
||||
year = int(m.group("year"))
|
||||
month = int(m.group("month") or "1")
|
||||
day = int(m.group("day") or "1")
|
||||
hour = int(m.group("hour") or "0")
|
||||
minute = int(m.group("minute") or "0")
|
||||
second = decimal.Decimal(m.group("second") or "0")
|
||||
seconds = second.to_integral(decimal.ROUND_FLOOR)
|
||||
milliseconds = (second - seconds) * 1000000
|
||||
tzd = m.group("tzd") or "Z"
|
||||
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
|
||||
if tzd != "Z":
|
||||
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
|
||||
tzd_hours *= -1
|
||||
if tzd_hours < 0:
|
||||
tzd_minutes *= -1
|
||||
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
|
||||
return dt
|
||||
_test_converter_date = staticmethod(_converter_date)
|
||||
|
||||
def _getter_bag(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
|
||||
if len(bags):
|
||||
for bag in bags:
|
||||
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_seq(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
|
||||
if len(seqs):
|
||||
for seq in seqs:
|
||||
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
else:
|
||||
value = converter(self._getText(element))
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_langalt(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = {}
|
||||
for element in self.getElement("", namespace, name):
|
||||
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
|
||||
if len(alts):
|
||||
for alt in alts:
|
||||
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval[item.getAttribute("xml:lang")] = value
|
||||
else:
|
||||
retval["x-default"] = converter(self._getText(element))
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_single(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
value = None
|
||||
for element in self.getElement("", namespace, name):
|
||||
if element.nodeType == element.ATTRIBUTE_NODE:
|
||||
value = element.nodeValue
|
||||
else:
|
||||
value = self._getText(element)
|
||||
break
|
||||
if value != None:
|
||||
value = converter(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = value
|
||||
return value
|
||||
return get
|
||||
|
||||
##
|
||||
# Contributors to the resource (other than the authors). An unsorted
|
||||
# array of names.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
|
||||
|
||||
##
|
||||
# Text describing the extent or scope of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
|
||||
|
||||
##
|
||||
# A sorted array of names of the authors of the resource, listed in order
|
||||
# of precedence.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
|
||||
|
||||
##
|
||||
# A sorted array of dates (datetime.datetime instances) of signifigance to
|
||||
# the resource. The dates and times are in UTC.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of textual descriptions of the content of the
|
||||
# resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
|
||||
|
||||
##
|
||||
# The mime-type of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
|
||||
|
||||
##
|
||||
# Unique identifier of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array specifying the languages used in the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of publisher names.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of text descriptions of relationships to other
|
||||
# documents.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of textual descriptions of the rights the
|
||||
# user has to this resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
|
||||
|
||||
##
|
||||
# Unique identifier of the work from which this resource was derived.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of descriptive phrases or keywrods that specify the
|
||||
# topic of the content of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
|
||||
|
||||
##
|
||||
# A language-keyed dictionary of the title of the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
|
||||
|
||||
##
|
||||
# An unordered array of textual descriptions of the document type.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
|
||||
|
||||
##
|
||||
# An unformatted text string representing document keywords.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
|
||||
|
||||
##
|
||||
# The PDF file version, for example 1.0, 1.3.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
|
||||
|
||||
##
|
||||
# The name of the tool that created the PDF document.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
|
||||
|
||||
##
|
||||
# The date and time the resource was originally created. The date and
|
||||
# time are returned as a UTC datetime.datetime object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
|
||||
|
||||
##
|
||||
# The date and time the resource was last modified. The date and time
|
||||
# are returned as a UTC datetime.datetime object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
|
||||
|
||||
##
|
||||
# The date and time that any metadata for this resource was last
|
||||
# changed. The date and time are returned as a UTC datetime.datetime
|
||||
# object.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
|
||||
|
||||
##
|
||||
# The name of the first known tool used to create the resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
|
||||
|
||||
##
|
||||
# The common identifier for all versions and renditions of this resource.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
|
||||
|
||||
##
|
||||
# An identifier for a specific incarnation of a document, updated each
|
||||
# time a file is saved.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
|
||||
|
||||
def custom_properties(self):
|
||||
if not hasattr(self, "_custom_properties"):
|
||||
self._custom_properties = {}
|
||||
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
|
||||
key = node.localName
|
||||
while True:
|
||||
# see documentation about PDFX_NAMESPACE earlier in file
|
||||
idx = key.find(u"\u2182")
|
||||
if idx == -1:
|
||||
break
|
||||
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
|
||||
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||
value = node.nodeValue
|
||||
else:
|
||||
value = self._getText(node)
|
||||
self._custom_properties[key] = value
|
||||
return self._custom_properties
|
||||
|
||||
##
|
||||
# Retrieves custom metadata properties defined in the undocumented pdfx
|
||||
# metadata schema.
|
||||
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||
# @return Returns a dictionary of key/value items for custom metadata
|
||||
# properties.
|
||||
custom_properties = property(custom_properties)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue