[REM] report: remove openerp/report/pyPdf

Distributing our version of pyPdf is not required anymore since we
introduced the new reporting tool.

In order to keep reports working, the standard python library
`python-pypdf` has to be installed instead (through pip or your
distribution's package manager)
This commit is contained in:
ssh-odoo 2014-09-05 10:23:58 +05:30 committed by Richard Mathot
parent 0b52cf1996
commit acdb43b6cb
6 changed files with 0 additions and 3400 deletions

View File

@ -1,6 +0,0 @@
from pdf import PdfFileReader, PdfFileWriter
__all__ = ["pdf"]
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4:

View File

@ -1,252 +0,0 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of stream filters for PDF.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
from utils import PdfReadError
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
try:
import zlib
def decompress(data):
return zlib.decompress(data)
def compress(data):
return zlib.compress(data)
except ImportError:
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
for i in range(len(buf)):
retval[i] = ord(buf[i])
return retval
def _bytearr_to_string(bytes):
retval = ""
for i in range(bytes.Length):
retval += chr(bytes[i])
return retval
def _read_bytes(stream):
ms = IO.MemoryStream()
buf = Array.CreateInstance(System.Byte, 2048)
while True:
bytes = stream.Read(buf, 0, buf.Length)
if bytes == 0:
break
else:
ms.Write(buf, 0, bytes)
retval = ms.ToArray()
ms.Close()
return retval
def decompress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
ms.Write(bytes, 0, bytes.Length)
ms.Position = 0 # fseek 0
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
bytes = _read_bytes(gz)
retval = _bytearr_to_string(bytes)
gz.Close()
return retval
def compress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
gz.Write(bytes, 0, bytes.Length)
gz.Close()
ms.Position = 0 # fseek 0
bytes = ms.ToArray()
retval = _bytearr_to_string(bytes)
ms.Close()
return retval
class FlateDecode(object):
def decode(data, decodeParms):
data = decompress(data)
predictor = 1
if decodeParms:
predictor = decodeParms.get("/Predictor", 1)
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
# PNG prediction:
if 10 <= predictor <= 15:
output = StringIO()
# PNG prediction can vary from row to row
rowlength = columns + 1
assert len(data) % rowlength == 0
prev_rowdata = (0,) * rowlength
for row in xrange(len(data) / rowlength):
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
filterByte = rowdata[0]
if filterByte == 0:
pass
elif filterByte == 1:
for i in range(2, rowlength):
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
elif filterByte == 2:
for i in range(1, rowlength):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
else:
# unsupported PNG filter
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
prev_rowdata = rowdata
output.write(''.join([chr(x) for x in rowdata[1:]]))
data = output.getvalue()
else:
# unsupported predictor
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
return data
decode = staticmethod(decode)
def encode(data):
return compress(data)
encode = staticmethod(encode)
class ASCIIHexDecode(object):
def decode(data, decodeParms=None):
retval = ""
char = ""
x = 0
while True:
c = data[x]
if c == ">":
break
elif c.isspace():
x += 1
continue
char += c
if len(char) == 2:
retval += chr(int(char, base=16))
char = ""
x += 1
assert char == ""
return retval
decode = staticmethod(decode)
class ASCII85Decode(object):
def decode(data, decodeParms=None):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert 0 <= c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
decode = staticmethod(decode)
def decodeStreamData(stream):
from generic import NameObject
filters = stream.get("/Filter", ())
if len(filters) and not isinstance(filters[0], NameObject):
# we have a single filter instance
filters = (filters,)
data = stream._data
for filterType in filters:
if filterType == "/FlateDecode":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode":
data = ASCIIHexDecode.decode(data)
elif filterType == "/ASCII85Decode":
data = ASCII85Decode.decode(data)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
pass
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
else:
# unsupported filter
raise NotImplementedError("unsupported filter %s" % filterType)
return data
if __name__ == "__main__":
assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
ascii85Test = """
<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
"""
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText

View File

@ -1,800 +0,0 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of generic PDF objects (dictionary, number, string, and so on)
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import re
from utils import readNonWhitespace, RC4_encrypt
import filters
import utils
import decimal
import codecs
def readObject(stream, pdf):
tok = stream.read(1)
stream.seek(-1, 1) # reset to start
if tok == 't' or tok == 'f':
# boolean object
return BooleanObject.readFromStream(stream)
elif tok == '(':
# string object
return readStringFromStream(stream)
elif tok == '/':
# name object
return NameObject.readFromStream(stream)
elif tok == '[':
# array object
return ArrayObject.readFromStream(stream, pdf)
elif tok == 'n':
# null object
return NullObject.readFromStream(stream)
elif tok == '<':
# hexadecimal string OR dictionary
peek = stream.read(2)
stream.seek(-2, 1) # reset to start
if peek == '<<':
return DictionaryObject.readFromStream(stream, pdf)
else:
return readHexStringFromStream(stream)
elif tok == '%':
# comment
while tok not in ('\r', '\n'):
tok = stream.read(1)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
return readObject(stream, pdf)
else:
# number object OR indirect reference
if tok == '+' or tok == '-':
# number
return NumberObject.readFromStream(stream)
peek = stream.read(20)
stream.seek(-len(peek), 1) # reset to start
if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) is not None:
return IndirectObject.readFromStream(stream, pdf)
else:
return NumberObject.readFromStream(stream)
class PdfObject(object):
def getObject(self):
"""Resolves indirect references."""
return self
class NullObject(PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write("null")
def readFromStream(stream):
nulltxt = stream.read(4)
if nulltxt != "null":
raise utils.PdfReadError, "error reading null object"
return NullObject()
readFromStream = staticmethod(readFromStream)
class BooleanObject(PdfObject):
def __init__(self, value):
self.value = value
def writeToStream(self, stream, encryption_key):
if self.value:
stream.write("true")
else:
stream.write("false")
def readFromStream(stream):
word = stream.read(4)
if word == "true":
return BooleanObject(True)
elif word == "fals":
stream.read(1)
return BooleanObject(False)
assert False
readFromStream = staticmethod(readFromStream)
class ArrayObject(list, PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write("[")
for data in self:
stream.write(" ")
data.writeToStream(stream, encryption_key)
stream.write(" ]")
def readFromStream(stream, pdf):
arr = ArrayObject()
tmp = stream.read(1)
if tmp != "[":
raise utils.PdfReadError, "error reading array"
while True:
# skip leading whitespace
tok = stream.read(1)
while tok.isspace():
tok = stream.read(1)
stream.seek(-1, 1)
# check for array ending
peekahead = stream.read(1)
if peekahead == "]":
break
stream.seek(-1, 1)
# read and append obj
arr.append(readObject(stream, pdf))
return arr
readFromStream = staticmethod(readFromStream)
class IndirectObject(PdfObject):
def __init__(self, idnum, generation, pdf):
self.idnum = idnum
self.generation = generation
self.pdf = pdf
def getObject(self):
return self.pdf.getObject(self).getObject()
def __repr__(self):
return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
def __eq__(self, other):
return (
other is not None and
isinstance(other, IndirectObject) and
self.idnum == other.idnum and
self.generation == other.generation and
self.pdf is other.pdf
)
def __ne__(self, other):
return not self.__eq__(other)
def writeToStream(self, stream, encryption_key):
stream.write("%s %s R" % (self.idnum, self.generation))
def readFromStream(stream, pdf):
idnum = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
idnum += tok
generation = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
generation += tok
r = stream.read(1)
if r != "R":
raise utils.PdfReadError("error reading indirect object reference")
return IndirectObject(int(idnum), int(generation), pdf)
readFromStream = staticmethod(readFromStream)
class FloatObject(decimal.Decimal, PdfObject):
def __new__(cls, value="0", context=None):
return decimal.Decimal.__new__(cls, str(value), context)
def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
else:
# XXX: this adds useless extraneous zeros.
return "%.5f" % self
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
class NumberObject(int, PdfObject):
def __init__(self, value):
int.__init__(value)
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
def readFromStream(stream):
name = ""
while True:
tok = stream.read(1)
if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
stream.seek(-1, 1)
break
name += tok
if name.find(".") != -1:
return FloatObject(name)
else:
return NumberObject(name)
readFromStream = staticmethod(readFromStream)
##
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
# TextStringObject to represent the string.
def createStringObject(string):
if isinstance(string, unicode):
return TextStringObject(string)
elif isinstance(string, str):
if string.startswith(codecs.BOM_UTF16_BE):
retval = TextStringObject(string.decode("utf-16"))
retval.autodetect_utf16 = True
return retval
else:
# This is probably a big performance hit here, but we need to
# convert string objects into the text/unicode-aware version if
# possible... and the only way to check if that's possible is
# to try. Some strings are strings, some are just byte arrays.
try:
retval = TextStringObject(decode_pdfdocencoding(string))
retval.autodetect_pdfdocencoding = True
return retval
except UnicodeDecodeError:
return ByteStringObject(string)
else:
raise TypeError("createStringObject should have str or unicode arg")
def readHexStringFromStream(stream):
stream.read(1)
txt = ""
x = ""
while True:
tok = readNonWhitespace(stream)
if tok == ">":
break
x += tok
if len(x) == 2:
txt += chr(int(x, base=16))
x = ""
if len(x) == 1:
x += "0"
if len(x) == 2:
txt += chr(int(x, base=16))
return createStringObject(txt)
def readStringFromStream(stream):
tok = stream.read(1)
parens = 1
txt = ""
while True:
tok = stream.read(1)
if tok == "(":
parens += 1
elif tok == ")":
parens -= 1
if parens == 0:
break
elif tok == "\\":
tok = stream.read(1)
if tok == "n":
tok = "\n"
elif tok == "r":
tok = "\r"
elif tok == "t":
tok = "\t"
elif tok == "b":
tok = "\b"
elif tok == "f":
tok = "\f"
elif tok == "(":
tok = "("
elif tok == ")":
tok = ")"
elif tok == "\\":
tok = "\\"
elif tok.isdigit():
# "The number ddd may consist of one, two, or three
# octal digits; high-order overflow shall be ignored.
# Three octal digits shall be used, with leading zeros
# as needed, if the next character of the string is also
# a digit." (PDF reference 7.3.4.2, p 16)
for i in range(2):
ntok = stream.read(1)
if ntok.isdigit():
tok += ntok
else:
break
tok = chr(int(tok, base=8))
elif tok in "\n\r":
# This case is hit when a backslash followed by a line
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if not tok in "\n\r":
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
tok = ''
else:
raise utils.PdfReadError("Unexpected escaped string")
txt += tok
return createStringObject(txt)
##
# Represents a string object where the text encoding could not be determined.
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
# represent strings -- for example, the encryption data stored in files (like
# /O) is clearly not text, but is still stored in a "String" object.
class ByteStringObject(str, PdfObject):
##
# For compatibility with TextStringObject.original_bytes. This method
# returns self.
original_bytes = property(lambda self: self)
def writeToStream(self, stream, encryption_key):
bytearr = self
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
stream.write("<")
stream.write(bytearr.encode("hex"))
stream.write(">")
##
# Represents a string object that has been decoded into a real unicode string.
# If read from a PDF document, this string appeared to match the
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
# occur.
class TextStringObject(unicode, PdfObject):
autodetect_pdfdocencoding = False
autodetect_utf16 = False
##
# It is occasionally possible that a text string object gets created where
# a byte string object was expected due to the autodetection mechanism --
# if that occurs, this "original_bytes" property can be used to
# back-calculate what the original encoded bytes were.
original_bytes = property(lambda self: self.get_original_bytes())
def get_original_bytes(self):
# We're a text string object, but the library is trying to get our raw
# bytes. This can happen if we auto-detected this string as text, but
# we were wrong. It's pretty common. Return the original bytes that
# would have been used to create this object, based upon the autodetect
# method.
if self.autodetect_utf16:
return codecs.BOM_UTF16_BE + self.encode("utf-16be")
elif self.autodetect_pdfdocencoding:
return encode_pdfdocencoding(self)
else:
raise Exception("no information about original bytes")
def writeToStream(self, stream, encryption_key):
# Try to write the string out as a PDFDocEncoding encoded string. It's
# nicer to look at in the PDF file. Sadly, we take a performance hit
# here for trying...
try:
bytearr = encode_pdfdocencoding(self)
except UnicodeEncodeError:
bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
obj = ByteStringObject(bytearr)
obj.writeToStream(stream, None)
else:
stream.write("(")
for c in bytearr:
if not c.isalnum() and c != ' ':
stream.write("\\%03o" % ord(c))
else:
stream.write(c)
stream.write(")")
class NameObject(str, PdfObject):
delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
def __init__(self, data):
str.__init__(data)
def writeToStream(self, stream, encryption_key):
stream.write(self)
def readFromStream(stream):
name = stream.read(1)
if name != "/":
raise utils.PdfReadError, "name read error"
while True:
tok = stream.read(1)
if tok.isspace() or tok in NameObject.delimiterCharacters:
stream.seek(-1, 1)
break
name += tok
return NameObject(name)
readFromStream = staticmethod(readFromStream)
class DictionaryObject(dict, PdfObject):
def __init__(self, *args, **kwargs):
if len(args) == 0:
self.update(kwargs)
elif len(args) == 1:
arr = args[0]
# If we're passed a list/tuple, make a dict out of it
if not hasattr(arr, "iteritems"):
newarr = {}
for k, v in arr:
newarr[k] = v
arr = newarr
self.update(arr)
else:
raise TypeError("dict expected at most 1 argument, got 3")
def update(self, arr):
# note, a ValueError halfway through copying values
# will leave half the values in this dict.
for k, v in arr.iteritems():
self.__setitem__(k, v)
def raw_get(self, key):
return dict.__getitem__(self, key)
def __setitem__(self, key, value):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.__setitem__(self, key, value)
def setdefault(self, key, value=None):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.setdefault(self, key, value)
def __getitem__(self, key):
return dict.__getitem__(self, key).getObject()
##
# Retrieves XMP (Extensible Metadata Platform) data relevant to the
# this object, if available.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
# @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
# that can be used to access XMP metadata from the document. Can also
# return None if no metadata was found on the document root.
def getXmpMetadata(self):
metadata = self.get("/Metadata", None)
if metadata is None:
return None
metadata = metadata.getObject()
import xmp
if not isinstance(metadata, xmp.XmpInformation):
metadata = xmp.XmpInformation(metadata)
self[NameObject("/Metadata")] = metadata
return metadata
##
# Read-only property that accesses the {@link
# #DictionaryObject.getXmpData getXmpData} function.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
def writeToStream(self, stream, encryption_key):
stream.write("<<\n")
for key, value in self.items():
key.writeToStream(stream, encryption_key)
stream.write(" ")
value.writeToStream(stream, encryption_key)
stream.write("\n")
stream.write(">>")
def readFromStream(stream, pdf):
tmp = stream.read(2)
if tmp != "<<":
raise utils.PdfReadError, "dictionary read error"
data = {}
while True:
tok = readNonWhitespace(stream)
if tok == ">":
stream.read(1)
break
stream.seek(-1, 1)
key = readObject(stream, pdf)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
value = readObject(stream, pdf)
if data.has_key(key):
# multiple definitions of key not permitted
raise utils.PdfReadError, "multiple definitions in dictionary"
data[key] = value
pos = stream.tell()
s = readNonWhitespace(stream)
if s == 's' and stream.read(5) == 'tream':
eol = stream.read(1)
# odd PDF file output has spaces after 'stream' keyword but before EOL.
# patch provided by Danial Sandler
while eol == ' ':
eol = stream.read(1)
assert eol in ("\n", "\r")
if eol == "\r":
# read \n after
stream.read(1)
# this is a stream object, not a dictionary
assert data.has_key("/Length")
length = data["/Length"]
if isinstance(length, IndirectObject):
t = stream.tell()
length = pdf.getObject(length)
stream.seek(t, 0)
data["__streamdata__"] = stream.read(length)
e = readNonWhitespace(stream)
ndstream = stream.read(8)
if (e + ndstream) != "endstream":
# (sigh) - the odd PDF file has a length that is too long, so
# we need to read backwards to find the "endstream" ending.
# ReportLab (unknown version) generates files with this bug,
# and Python users into PDF files tend to be our audience.
# we need to do this to correct the streamdata and chop off
# an extra character.
pos = stream.tell()
stream.seek(-10, 1)
end = stream.read(9)
if end == "endstream":
# we found it by looking back one character further.
data["__streamdata__"] = data["__streamdata__"][:-1]
else:
stream.seek(pos, 0)
raise utils.PdfReadError, "Unable to find 'endstream' marker after stream."
else:
stream.seek(pos, 0)
if data.has_key("__streamdata__"):
return StreamObject.initializeFromDictionary(data)
else:
retval = DictionaryObject()
retval.update(data)
return retval
readFromStream = staticmethod(readFromStream)
class StreamObject(DictionaryObject):
def __init__(self):
self._data = None
self.decodedSelf = None
def writeToStream(self, stream, encryption_key):
self[NameObject("/Length")] = NumberObject(len(self._data))
DictionaryObject.writeToStream(self, stream, encryption_key)
del self["/Length"]
stream.write("\nstream\n")
data = self._data
if encryption_key:
data = RC4_encrypt(encryption_key, data)
stream.write(data)
stream.write("\nendstream")
def initializeFromDictionary(data):
if data.has_key("/Filter"):
retval = EncodedStreamObject()
else:
retval = DecodedStreamObject()
retval._data = data["__streamdata__"]
del data["__streamdata__"]
del data["/Length"]
retval.update(data)
return retval
initializeFromDictionary = staticmethod(initializeFromDictionary)
def flateEncode(self):
if self.has_key("/Filter"):
f = self["/Filter"]
if isinstance(f, ArrayObject):
f.insert(0, NameObject("/FlateDecode"))
else:
newf = ArrayObject()
newf.append(NameObject("/FlateDecode"))
newf.append(f)
f = newf
else:
f = NameObject("/FlateDecode")
retval = EncodedStreamObject()
retval[NameObject("/Filter")] = f
retval._data = filters.FlateDecode.encode(self._data)
return retval
class DecodedStreamObject(StreamObject):
def getData(self):
return self._data
def setData(self, data):
self._data = data
class EncodedStreamObject(StreamObject):
def __init__(self):
self.decodedSelf = None
def getData(self):
if self.decodedSelf:
# cached version of decoded object
return self.decodedSelf.getData()
else:
# create decoded object
decoded = DecodedStreamObject()
decoded._data = filters.decodeStreamData(self)
for key, value in self.items():
if not key in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
def setData(self, data):
raise utils.PdfReadError, "Creating EncodedStreamObject is not currently supported"
class RectangleObject(ArrayObject):
def __init__(self, arr):
# must have four points
assert len(arr) == 4
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
def ensureIsNumber(self, value):
if not isinstance(value, (NumberObject, FloatObject)):
value = FloatObject(value)
return value
def __repr__(self):
return "RectangleObject(%s)" % repr(list(self))
def getLowerLeft_x(self):
return self[0]
def getLowerLeft_y(self):
return self[1]
def getUpperRight_x(self):
return self[2]
def getUpperRight_y(self):
return self[3]
def getUpperLeft_x(self):
return self.getLowerLeft_x()
def getUpperLeft_y(self):
return self.getUpperRight_y()
def getLowerRight_x(self):
return self.getUpperRight_x()
def getLowerRight_y(self):
return self.getLowerLeft_y()
def getLowerLeft(self):
return self.getLowerLeft_x(), self.getLowerLeft_y()
def getLowerRight(self):
return self.getLowerRight_x(), self.getLowerRight_y()
def getUpperLeft(self):
return self.getUpperLeft_x(), self.getUpperLeft_y()
def getUpperRight(self):
return self.getUpperRight_x(), self.getUpperRight_y()
def setLowerLeft(self, value):
self[0], self[1] = [self.ensureIsNumber(x) for x in value]
def setLowerRight(self, value):
self[2], self[1] = [self.ensureIsNumber(x) for x in value]
def setUpperLeft(self, value):
self[0], self[3] = [self.ensureIsNumber(x) for x in value]
def setUpperRight(self, value):
self[2], self[3] = [self.ensureIsNumber(x) for x in value]
def getWidth(self):
return self.getUpperRight_x() - self.getLowerLeft_x()
def getHeight(self):
return self.getUpperRight_y() - self.getLowerLeft_x()
lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
lowerRight = property(getLowerRight, setLowerRight, None, None)
upperLeft = property(getUpperLeft, setUpperLeft, None, None)
upperRight = property(getUpperRight, setUpperRight, None, None)
def encode_pdfdocencoding(unicode_string):
retval = ''
for c in unicode_string:
try:
retval += chr(_pdfDocEncoding_rev[c])
except KeyError:
raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
"does not exist in translation table")
return retval
def decode_pdfdocencoding(byte_array):
retval = u''
for b in byte_array:
c = _pdfDocEncoding[ord(b)]
if c == u'\u0000':
raise UnicodeDecodeError("pdfdocencoding", b, -1, -1,
"does not exist in translation table")
retval += c
return retval
_pdfDocEncoding = (
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u02d8', u'\u02c7', u'\u02c6', u'\u02d9', u'\u02dd', u'\u02db', u'\u02da', u'\u02dc',
u'\u0020', u'\u0021', u'\u0022', u'\u0023', u'\u0024', u'\u0025', u'\u0026', u'\u0027',
u'\u0028', u'\u0029', u'\u002a', u'\u002b', u'\u002c', u'\u002d', u'\u002e', u'\u002f',
u'\u0030', u'\u0031', u'\u0032', u'\u0033', u'\u0034', u'\u0035', u'\u0036', u'\u0037',
u'\u0038', u'\u0039', u'\u003a', u'\u003b', u'\u003c', u'\u003d', u'\u003e', u'\u003f',
u'\u0040', u'\u0041', u'\u0042', u'\u0043', u'\u0044', u'\u0045', u'\u0046', u'\u0047',
u'\u0048', u'\u0049', u'\u004a', u'\u004b', u'\u004c', u'\u004d', u'\u004e', u'\u004f',
u'\u0050', u'\u0051', u'\u0052', u'\u0053', u'\u0054', u'\u0055', u'\u0056', u'\u0057',
u'\u0058', u'\u0059', u'\u005a', u'\u005b', u'\u005c', u'\u005d', u'\u005e', u'\u005f',
u'\u0060', u'\u0061', u'\u0062', u'\u0063', u'\u0064', u'\u0065', u'\u0066', u'\u0067',
u'\u0068', u'\u0069', u'\u006a', u'\u006b', u'\u006c', u'\u006d', u'\u006e', u'\u006f',
u'\u0070', u'\u0071', u'\u0072', u'\u0073', u'\u0074', u'\u0075', u'\u0076', u'\u0077',
u'\u0078', u'\u0079', u'\u007a', u'\u007b', u'\u007c', u'\u007d', u'\u007e', u'\u0000',
u'\u2022', u'\u2020', u'\u2021', u'\u2026', u'\u2014', u'\u2013', u'\u0192', u'\u2044',
u'\u2039', u'\u203a', u'\u2212', u'\u2030', u'\u201e', u'\u201c', u'\u201d', u'\u2018',
u'\u2019', u'\u201a', u'\u2122', u'\ufb01', u'\ufb02', u'\u0141', u'\u0152', u'\u0160',
u'\u0178', u'\u017d', u'\u0131', u'\u0142', u'\u0153', u'\u0161', u'\u017e', u'\u0000',
u'\u20ac', u'\u00a1', u'\u00a2', u'\u00a3', u'\u00a4', u'\u00a5', u'\u00a6', u'\u00a7',
u'\u00a8', u'\u00a9', u'\u00aa', u'\u00ab', u'\u00ac', u'\u0000', u'\u00ae', u'\u00af',
u'\u00b0', u'\u00b1', u'\u00b2', u'\u00b3', u'\u00b4', u'\u00b5', u'\u00b6', u'\u00b7',
u'\u00b8', u'\u00b9', u'\u00ba', u'\u00bb', u'\u00bc', u'\u00bd', u'\u00be', u'\u00bf',
u'\u00c0', u'\u00c1', u'\u00c2', u'\u00c3', u'\u00c4', u'\u00c5', u'\u00c6', u'\u00c7',
u'\u00c8', u'\u00c9', u'\u00ca', u'\u00cb', u'\u00cc', u'\u00cd', u'\u00ce', u'\u00cf',
u'\u00d0', u'\u00d1', u'\u00d2', u'\u00d3', u'\u00d4', u'\u00d5', u'\u00d6', u'\u00d7',
u'\u00d8', u'\u00d9', u'\u00da', u'\u00db', u'\u00dc', u'\u00dd', u'\u00de', u'\u00df',
u'\u00e0', u'\u00e1', u'\u00e2', u'\u00e3', u'\u00e4', u'\u00e5', u'\u00e6', u'\u00e7',
u'\u00e8', u'\u00e9', u'\u00ea', u'\u00eb', u'\u00ec', u'\u00ed', u'\u00ee', u'\u00ef',
u'\u00f0', u'\u00f1', u'\u00f2', u'\u00f3', u'\u00f4', u'\u00f5', u'\u00f6', u'\u00f7',
u'\u00f8', u'\u00f9', u'\u00fa', u'\u00fb', u'\u00fc', u'\u00fd', u'\u00fe', u'\u00ff'
)
assert len(_pdfDocEncoding) == 256
_pdfDocEncoding_rev = {}
for i in xrange(256):
char = _pdfDocEncoding[i]
if char == u"\u0000":
continue
assert char not in _pdfDocEncoding_rev
_pdfDocEncoding_rev[char] = i

File diff suppressed because it is too large Load Diff

View File

@ -1,122 +0,0 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
#ENABLE_PSYCO = False
#if ENABLE_PSYCO:
# try:
# import psyco
# except ImportError:
# ENABLE_PSYCO = False
#
#if not ENABLE_PSYCO:
# class psyco:
# def proxy(func):
# return func
# proxy = staticmethod(proxy)
def readUntilWhitespace(stream, maxchars=None):
txt = ""
while True:
tok = stream.read(1)
if tok.isspace() or not tok:
break
txt += tok
if len(txt) == maxchars:
break
return txt
def readNonWhitespace(stream):
tok = ' '
while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
tok = stream.read(1)
return tok
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
self.getFunction = getFunction
def __len__(self):
return self.lengthFunction()
def __getitem__(self, index):
if not isinstance(index, int):
raise TypeError, "sequence indices must be integers"
len_self = len(self)
if index < 0:
# support negative indexes
index += len_self
if index < 0 or index >= len_self:
raise IndexError, "sequence index out of range"
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
for i in range(256):
j = (j + S[i] + ord(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = ""
for x in range(len(plaintext)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval += chr(ord(plaintext[x]) ^ t)
return retval
def matrixMultiply(a, b):
return [[sum([float(i)*float(j)
for i, j in zip(row, col)]
) for col in zip(*b)]
for row in a]
class PyPdfError(Exception):
pass
class PdfReadError(PyPdfError):
pass
class PageSizeNotDefinedError(PyPdfError):
pass
if __name__ == "__main__":
# test RC4
out = RC4_encrypt("Key", "Plaintext")
print repr(out)
pt = RC4_encrypt("Key", out)
print repr(pt)

View File

@ -1,357 +0,0 @@
import re
import datetime
import decimal
from generic import PdfObject
from xml.dom import getDOMImplementation
from xml.dom.minidom import parseString
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
# What is the PDFX namespace, you might ask? I might ask that too. It's
# a completely undocumented namespace used to place "custom metadata"
# properties, which are arbitrary metadata properties with no semantic or
# documented meaning. Elements in the namespace are key/value-style storage,
# where the element name is the key and the content is the value. The keys
# are transformed into valid XML identifiers by substituting an invalid
# identifier character with \u2182 followed by the unicode hex ID of the
# original character. A key like "my car" is therefore "my\u21820020car".
#
# \u2182, in case you're wondering, is the unicode character
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
# escaping characters.
#
# Intentional users of the pdfx namespace should be shot on sight. A
# custom data schema and sensical XML elements could be used instead, as is
# suggested by Adobe's own documentation on XMP (under "Extensibility of
# Schemas").
#
# Information presented here on the /pdfx/ schema is a result of limited
# reverse engineering, and does not constitute a full specification.
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
iso8601 = re.compile("""
(?P<year>[0-9]{4})
(-
(?P<month>[0-9]{2})
(-
(?P<day>[0-9]+)
(T
(?P<hour>[0-9]{2}):
(?P<minute>[0-9]{2})
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
)?
)?
)?
""", re.VERBOSE)
##
# An object that represents Adobe XMP metadata.
class XmpInformation(PdfObject):
def __init__(self, stream):
self.stream = stream
docRoot = parseString(self.stream.getData())
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
self.cache = {}
def writeToStream(self, stream, encryption_key):
self.stream.writeToStream(stream, encryption_key)
def getElement(self, aboutUri, namespace, name):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
attr = desc.getAttributeNodeNS(namespace, name)
if attr is not None:
yield attr
for element in desc.getElementsByTagNameNS(namespace, name):
yield element
def getNodesInNamespace(self, aboutUri, namespace):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
for i in range(desc.attributes.length):
attr = desc.attributes.item(i)
if attr.namespaceURI == namespace:
yield attr
for child in desc.childNodes:
if child.namespaceURI == namespace:
yield child
def _getText(self, element):
text = ""
for child in element.childNodes:
if child.nodeType == child.TEXT_NODE:
text += child.data
return text
def _converter_string(value):
return value
def _converter_date(value):
m = iso8601.match(value)
year = int(m.group("year"))
month = int(m.group("month") or "1")
day = int(m.group("day") or "1")
hour = int(m.group("hour") or "0")
minute = int(m.group("minute") or "0")
second = decimal.Decimal(m.group("second") or "0")
seconds = second.to_integral(decimal.ROUND_FLOOR)
milliseconds = (second - seconds) * 1000000
tzd = m.group("tzd") or "Z"
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
if tzd != "Z":
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
tzd_hours *= -1
if tzd_hours < 0:
tzd_minutes *= -1
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
return dt
_test_converter_date = staticmethod(_converter_date)
def _getter_bag(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
if len(bags):
for bag in bags:
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_seq(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
if len(seqs):
for seq in seqs:
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
else:
value = converter(self._getText(element))
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_langalt(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = {}
for element in self.getElement("", namespace, name):
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
if len(alts):
for alt in alts:
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval[item.getAttribute("xml:lang")] = value
else:
retval["x-default"] = converter(self._getText(element))
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_single(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
value = None
for element in self.getElement("", namespace, name):
if element.nodeType == element.ATTRIBUTE_NODE:
value = element.nodeValue
else:
value = self._getText(element)
break
if value is not None:
value = converter(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = value
return value
return get
##
# Contributors to the resource (other than the authors). An unsorted
# array of names.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
##
# Text describing the extent or scope of the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
##
# A sorted array of names of the authors of the resource, listed in order
# of precedence.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
##
# A sorted array of dates (datetime.datetime instances) of signifigance to
# the resource. The dates and times are in UTC.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
##
# A language-keyed dictionary of textual descriptions of the content of the
# resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
##
# The mime-type of the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
##
# Unique identifier of the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
##
# An unordered array specifying the languages used in the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
##
# An unordered array of publisher names.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
##
# An unordered array of text descriptions of relationships to other
# documents.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
##
# A language-keyed dictionary of textual descriptions of the rights the
# user has to this resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
##
# Unique identifier of the work from which this resource was derived.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
##
# An unordered array of descriptive phrases or keywrods that specify the
# topic of the content of the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
##
# A language-keyed dictionary of the title of the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
##
# An unordered array of textual descriptions of the document type.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
##
# An unformatted text string representing document keywords.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
##
# The PDF file version, for example 1.0, 1.3.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
##
# The name of the tool that created the PDF document.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
##
# The date and time the resource was originally created. The date and
# time are returned as a UTC datetime.datetime object.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
##
# The date and time the resource was last modified. The date and time
# are returned as a UTC datetime.datetime object.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
##
# The date and time that any metadata for this resource was last
# changed. The date and time are returned as a UTC datetime.datetime
# object.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
##
# The name of the first known tool used to create the resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
##
# The common identifier for all versions and renditions of this resource.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
##
# An identifier for a specific incarnation of a document, updated each
# time a file is saved.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
def custom_properties(self):
if not hasattr(self, "_custom_properties"):
self._custom_properties = {}
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
key = node.localName
while True:
# see documentation about PDFX_NAMESPACE earlier in file
idx = key.find(u"\u2182")
if idx == -1:
break
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
if node.nodeType == node.ATTRIBUTE_NODE:
value = node.nodeValue
else:
value = self._getText(node)
self._custom_properties[key] = value
return self._custom_properties
##
# Retrieves custom metadata properties defined in the undocumented pdfx
# metadata schema.
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
# @return Returns a dictionary of key/value items for custom metadata
# properties.
custom_properties = property(custom_properties)
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: