pyPdf: upgrade from upstream git: 4abdca42a7d8a4

bzr revid: p_christ@hol.gr-20110106111400-hqw1nu5wx1mict4t
2011-01-06 13:14:00 +02:00 · 2011-01-06 13:14:00 +02:00 · 73af237c8b
parent 660565f56e
commit 73af237c8b
6 changed files with 1136 additions and 767 deletions
--- a/bin/report/pyPdf/init.py
+++ b/bin/report/pyPdf/init.py
@ -1,3 +1,2 @@
-# -*- coding: utf-8 -*-
 from pdf import PdfFileReader, PdfFileWriter
 __all__ = ["pdf"]
--- a/bin/report/pyPdf/filters.py
+++ b/bin/report/pyPdf/filters.py
@ -1,253 +1,252 @@
-# -*- coding: utf-8 -*-
-# vim: sw=4:expandtab:foldmethod=marker
-#
-# Copyright (c) 2006, Mathieu Fenniak
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-
-"""
-Implementation of stream filters for PDF.
-"""
-__author__ = "Mathieu Fenniak"
-__author_email__ = "biziqe@mathieu.fenniak.net"
-
-from utils import PdfReadError
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
-try:
-    import zlib
-    def decompress(data):
-        return zlib.decompress(data)
-    def compress(data):
-        return zlib.compress(data)
-except ImportError:
-    # Unable to import zlib.  Attempt to use the System.IO.Compression
-    # library from the .NET framework. (IronPython only)
-    import System
-    from System import IO, Collections, Array
-    def _string_to_bytearr(buf):
-        retval = Array.CreateInstance(System.Byte, len(buf))
-        for i in range(len(buf)):
-            retval[i] = ord(buf[i])
-        return retval
-    def _bytearr_to_string(bytes):
-        retval = ""
-        for i in range(bytes.Length):
-            retval += chr(bytes[i])
-        return retval
-    def _read_bytes(stream):
-        ms = IO.MemoryStream()
-        buf = Array.CreateInstance(System.Byte, 2048)
-        while True:
-            bytes = stream.Read(buf, 0, buf.Length)
-            if bytes == 0:
-                break
-            else:
-                ms.Write(buf, 0, bytes)
-        retval = ms.ToArray()
-        ms.Close()
-        return retval
-    def decompress(data):
-        bytes = _string_to_bytearr(data)
-        ms = IO.MemoryStream()
-        ms.Write(bytes, 0, bytes.Length)
-        ms.Position = 0  # fseek 0
-        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
-        bytes = _read_bytes(gz)
-        retval = _bytearr_to_string(bytes)
-        gz.Close()
-        return retval
-    def compress(data):
-        bytes = _string_to_bytearr(data)
-        ms = IO.MemoryStream()
-        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
-        gz.Write(bytes, 0, bytes.Length)
-        gz.Close()
-        ms.Position = 0 # fseek 0
-        bytes = ms.ToArray()
-        retval = _bytearr_to_string(bytes)
-        ms.Close()
-        return retval
-
-
-class FlateDecode(object):
-    def decode(data, decodeParms):
-        data = decompress(data)
-        predictor = 1
-        if decodeParms:
-            predictor = decodeParms.get("/Predictor", 1)
-        # predictor 1 == no predictor
-        if predictor != 1:
-            columns = decodeParms["/Columns"]
-            # PNG prediction:
-            if predictor >= 10 and predictor <= 15:
-                output = StringIO()
-                # PNG prediction can vary from row to row
-                rowlength = columns + 1
-                assert len(data) % rowlength == 0
-                prev_rowdata = (0,) * rowlength
-                for row in xrange(len(data) / rowlength):
-                    rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
-                    filterByte = rowdata[0]
-                    if filterByte == 0:
-                        pass
-                    elif filterByte == 1:
-                        for i in range(2, rowlength):
-                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
-                    elif filterByte == 2:
-                        for i in range(1, rowlength):
-                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
-                    else:
-                        # unsupported PNG filter
-                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
-                    prev_rowdata = rowdata
-                    output.write(''.join([chr(x) for x in rowdata[1:]]))
-                data = output.getvalue()
-            else:
-                # unsupported predictor
-                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
-        return data
-    decode = staticmethod(decode)
-
-    def encode(data):
-        return compress(data)
-    encode = staticmethod(encode)
-
-class ASCIIHexDecode(object):
-    def decode(data, decodeParms=None):
-        retval = ""
-        char = ""
-        x = 0
-        while True:
-            c = data[x]
-            if c == ">":
-                break
-            elif c.isspace():
-                x += 1
-                continue
-            char += c
-            if len(char) == 2:
-                retval += chr(int(char, base=16))
-                char = ""
-            x += 1
-        assert char == ""
-        return retval
-    decode = staticmethod(decode)
-
-class ASCII85Decode(object):
-    def decode(data, decodeParms=None):
-        retval = ""
-        group = []
-        x = 0
-        hitEod = False
-        # remove all whitespace from data
-        data = [y for y in data if not (y in ' \n\r\t')]
-        while not hitEod:
-            c = data[x]
-            if len(retval) == 0 and c == "<" and data[x+1] == "~":
-                x += 2
-                continue
-            #elif c.isspace():
-            #    x += 1
-            #    continue
-            elif c == 'z':
-                assert len(group) == 0
-                retval += '\x00\x00\x00\x00'
-                continue
-            elif c == "~" and data[x+1] == ">":
-                if len(group) != 0:
-                    # cannot have a final group of just 1 char
-                    assert len(group) > 1
-                    cnt = len(group) - 1
-                    group += [ 85, 85, 85 ]
-                    hitEod = cnt
-                else:
-                    break
-            else:
-                c = ord(c) - 33
-                assert c >= 0 and c < 85
-                group += [ c ]
-            if len(group) >= 5:
-                b = group[0] * (85**4) + \
-                    group[1] * (85**3) + \
-                    group[2] * (85**2) + \
-                    group[3] * 85 + \
-                    group[4]
-                assert b < (2**32 - 1)
-                c4 = chr((b >> 0) % 256)
-                c3 = chr((b >> 8) % 256)
-                c2 = chr((b >> 16) % 256)
-                c1 = chr(b >> 24)
-                retval += (c1 + c2 + c3 + c4)
-                if hitEod:
-                    retval = retval[:-4+hitEod]
-                group = []
-            x += 1
-        return retval
-    decode = staticmethod(decode)
-
-def decodeStreamData(stream):
-    from generic import NameObject
-    filters = stream.get("/Filter", ())
-    if len(filters) and not isinstance(filters[0], NameObject):
-        # we have a single filter instance
-        filters = (filters,)
-    data = stream._data
-    for filterType in filters:
-        if filterType == "/FlateDecode":
-            data = FlateDecode.decode(data, stream.get("/DecodeParms"))
-        elif filterType == "/ASCIIHexDecode":
-            data = ASCIIHexDecode.decode(data)
-        elif filterType == "/ASCII85Decode":
-            data = ASCII85Decode.decode(data)
-        elif filterType == "/Crypt":
-            decodeParams = stream.get("/DecodeParams", {})
-            if "/Name" not in decodeParams and "/Type" not in decodeParams:
-                pass
-            else:
-                raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
-        else:
-            # unsupported filter
-            raise NotImplementedError("unsupported filter %s" % filterType)
-    return data
-
-if __name__ == "__main__":
-    assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
-
-    ascii85Test = """
-     <~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
-     O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
-     i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
-     l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
-     >uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
-    """
-    ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
-    assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
-
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters for PDF.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+from utils import PdfReadError
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+try:
+    import zlib
+    def decompress(data):
+        return zlib.decompress(data)
+    def compress(data):
+        return zlib.compress(data)
+except ImportError:
+    # Unable to import zlib.  Attempt to use the System.IO.Compression
+    # library from the .NET framework. (IronPython only)
+    import System
+    from System import IO, Collections, Array
+    def _string_to_bytearr(buf):
+        retval = Array.CreateInstance(System.Byte, len(buf))
+        for i in range(len(buf)):
+            retval[i] = ord(buf[i])
+        return retval
+    def _bytearr_to_string(bytes):
+        retval = ""
+        for i in range(bytes.Length):
+            retval += chr(bytes[i])
+        return retval
+    def _read_bytes(stream):
+        ms = IO.MemoryStream()
+        buf = Array.CreateInstance(System.Byte, 2048)
+        while True:
+            bytes = stream.Read(buf, 0, buf.Length)
+            if bytes == 0:
+                break
+            else:
+                ms.Write(buf, 0, bytes)
+        retval = ms.ToArray()
+        ms.Close()
+        return retval
+    def decompress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        ms.Write(bytes, 0, bytes.Length)
+        ms.Position = 0  # fseek 0
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
+        bytes = _read_bytes(gz)
+        retval = _bytearr_to_string(bytes)
+        gz.Close()
+        return retval
+    def compress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
+        gz.Write(bytes, 0, bytes.Length)
+        gz.Close()
+        ms.Position = 0 # fseek 0
+        bytes = ms.ToArray()
+        retval = _bytearr_to_string(bytes)
+        ms.Close()
+        return retval
+
+
+class FlateDecode(object):
+    def decode(data, decodeParms):
+        data = decompress(data)
+        predictor = 1
+        if decodeParms:
+            predictor = decodeParms.get("/Predictor", 1)
+        # predictor 1 == no predictor
+        if predictor != 1:
+            columns = decodeParms["/Columns"]
+            # PNG prediction:
+            if predictor >= 10 and predictor <= 15:
+                output = StringIO()
+                # PNG prediction can vary from row to row
+                rowlength = columns + 1
+                assert len(data) % rowlength == 0
+                prev_rowdata = (0,) * rowlength
+                for row in xrange(len(data) / rowlength):
+                    rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
+                    filterByte = rowdata[0]
+                    if filterByte == 0:
+                        pass
+                    elif filterByte == 1:
+                        for i in range(2, rowlength):
+                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
+                    elif filterByte == 2:
+                        for i in range(1, rowlength):
+                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                    else:
+                        # unsupported PNG filter
+                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
+                    prev_rowdata = rowdata
+                    output.write(''.join([chr(x) for x in rowdata[1:]]))
+                data = output.getvalue()
+            else:
+                # unsupported predictor
+                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
+        return data
+    decode = staticmethod(decode)
+
+    def encode(data):
+        return compress(data)
+    encode = staticmethod(encode)
+
+class ASCIIHexDecode(object):
+    def decode(data, decodeParms=None):
+        retval = ""
+        char = ""
+        x = 0
+        while True:
+            c = data[x]
+            if c == ">":
+                break
+            elif c.isspace():
+                x += 1
+                continue
+            char += c
+            if len(char) == 2:
+                retval += chr(int(char, base=16))
+                char = ""
+            x += 1
+        assert char == ""
+        return retval
+    decode = staticmethod(decode)
+
+class ASCII85Decode(object):
+    def decode(data, decodeParms=None):
+        retval = ""
+        group = []
+        x = 0
+        hitEod = False
+        # remove all whitespace from data
+        data = [y for y in data if not (y in ' \n\r\t')]
+        while not hitEod:
+            c = data[x]
+            if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                x += 2
+                continue
+            #elif c.isspace():
+            #    x += 1
+            #    continue
+            elif c == 'z':
+                assert len(group) == 0
+                retval += '\x00\x00\x00\x00'
+                continue
+            elif c == "~" and data[x+1] == ">":
+                if len(group) != 0:
+                    # cannot have a final group of just 1 char
+                    assert len(group) > 1
+                    cnt = len(group) - 1
+                    group += [ 85, 85, 85 ]
+                    hitEod = cnt
+                else:
+                    break
+            else:
+                c = ord(c) - 33
+                assert c >= 0 and c < 85
+                group += [ c ]
+            if len(group) >= 5:
+                b = group[0] * (85**4) + \
+                    group[1] * (85**3) + \
+                    group[2] * (85**2) + \
+                    group[3] * 85 + \
+                    group[4]
+                assert b < (2**32 - 1)
+                c4 = chr((b >> 0) % 256)
+                c3 = chr((b >> 8) % 256)
+                c2 = chr((b >> 16) % 256)
+                c1 = chr(b >> 24)
+                retval += (c1 + c2 + c3 + c4)
+                if hitEod:
+                    retval = retval[:-4+hitEod]
+                group = []
+            x += 1
+        return retval
+    decode = staticmethod(decode)
+
+def decodeStreamData(stream):
+    from generic import NameObject
+    filters = stream.get("/Filter", ())
+    if len(filters) and not isinstance(filters[0], NameObject):
+        # we have a single filter instance
+        filters = (filters,)
+    data = stream._data
+    for filterType in filters:
+        if filterType == "/FlateDecode":
+            data = FlateDecode.decode(data, stream.get("/DecodeParms"))
+        elif filterType == "/ASCIIHexDecode":
+            data = ASCIIHexDecode.decode(data)
+        elif filterType == "/ASCII85Decode":
+            data = ASCII85Decode.decode(data)
+        elif filterType == "/Crypt":
+            decodeParams = stream.get("/DecodeParams", {})
+            if "/Name" not in decodeParams and "/Type" not in decodeParams:
+                pass
+            else:
+                raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
+        else:
+            # unsupported filter
+            raise NotImplementedError("unsupported filter %s" % filterType)
+    return data
+
+if __name__ == "__main__":
+    assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
+
+    ascii85Test = """
+     <~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
+     O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
+     i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
+     l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
+     >uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
+    """
+    ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
+    assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
+
--- a/bin/report/pyPdf/generic.py
+++ b/bin/report/pyPdf/generic.py
@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
@ -207,15 +206,18 @@ class FloatObject(decimal.Decimal, PdfObject):
    def __new__(cls, value="0", context=None):
        return decimal.Decimal.__new__(cls, str(value), context)
    def __repr__(self):
-        return str(self)
+        if self == self.to_integral():
+            return str(self.quantize(decimal.Decimal(1)))
+        else:
+            # XXX: this adds useless extraneous zeros.
+            return "%.5f" % self
    def writeToStream(self, stream, encryption_key):
-        stream.write(str(self))
+        stream.write(repr(self))


 class NumberObject(int, PdfObject):
    def __init__(self, value):
-        int.__init__(self)
-        self = value
+        int.__init__(value)

    def writeToStream(self, stream, encryption_key):
        stream.write(repr(self))
@ -301,7 +303,7 @@ def readStringFromStream(stream):
            elif tok == "t":
                tok = "\t"
            elif tok == "b":
-                tok == "\b"
+                tok = "\b"
            elif tok == "f":
                tok = "\f"
            elif tok == "(":
@ -311,7 +313,17 @@ def readStringFromStream(stream):
            elif tok == "\\":
                tok = "\\"
            elif tok.isdigit():
-                tok += stream.read(2)
+                # "The number ddd may consist of one, two, or three
+                # octal digits; high-order overflow shall be ignored.
+                # Three octal digits shall be used, with leading zeros
+                # as needed, if the next character of the string is also
+                # a digit." (PDF reference 7.3.4.2, p 16)
+                for i in range(2):
+                    ntok = stream.read(1)
+                    if ntok.isdigit():
+                        tok += ntok
+                    else:
+                        break
                tok = chr(int(tok, base=8))
            elif tok in "\n\r":
                # This case is  hit when a backslash followed by a line
@ -405,8 +417,7 @@ class NameObject(str, PdfObject):
    delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"

    def __init__(self, data):
-        str.__init__(self)
-        self = data
+        str.__init__(data)

    def writeToStream(self, stream, encryption_key):
        stream.write(self)
@ -710,6 +721,12 @@ class RectangleObject(ArrayObject):
    def setUpperRight(self, value):
        self[2], self[3] = [self.ensureIsNumber(x) for x in value]

+    def getWidth(self):
+        return self.getUpperRight_x() - self.getLowerLeft_x()
+
+    def getHeight(self):
+        return self.getUpperRight_y() - self.getLowerLeft_x()
+
    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
    lowerRight = property(getLowerRight, setLowerRight, None, None)
    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
--- a/bin/report/pyPdf/pdf.py
+++ b/bin/report/pyPdf/pdf.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+#
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
@ -39,7 +40,9 @@ It may be a solid base for future PDF file work in Python.
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"

+import math
 import struct
+from sys import version_info
 try:
    from cStringIO import StringIO
 except ImportError:
@ -51,6 +54,14 @@ import warnings
 from generic import *
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList

+if version_info < ( 2, 4 ):
+   from sets import ImmutableSet as frozenset
+
+if version_info < ( 2, 5 ):
+    from md5 import md5
+else:
+    from hashlib import md5
+
 ##
 # This class supports writing PDF files out, given pages produced by another
 # class (typically {@link #PdfFileReader PdfFileReader}).
@ -92,6 +103,21 @@ class PdfFileWriter(object):
            raise ValueError("pdf must be self")
        return self._objects[ido.idnum - 1]

+    ##
+    # Common method for inserting or adding a page to this PDF file.
+    #
+    # @param page The page to add to the document.  This argument should be
+    #             an instance of {@link #PageObject PageObject}.
+    # @param action The function which will insert the page in the dictionnary.
+    #               Takes: page list, page to add.
+    def _addPage(self, page, action):
+        assert page["/Type"] == "/Page"
+        page[NameObject("/Parent")] = self._pages
+        page = self._addObject(page)
+        pages = self.getObject(self._pages)
+        action(pages["/Kids"], page)
+        pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
+
    ##
    # Adds a page to this PDF file.  The page is usually acquired from a
    # {@link #PdfFileReader PdfFileReader} instance.
@ -101,12 +127,64 @@ class PdfFileWriter(object):
    # @param page The page to add to the document.  This argument should be
    #             an instance of {@link #PageObject PageObject}.
    def addPage(self, page):
-        assert page["/Type"] == "/Page"
-        page[NameObject("/Parent")] = self._pages
-        page = self._addObject(page)
+        self._addPage(page, list.append)
+
+    ##
+    # Insert a page in this PDF file.  The page is usually acquired from a
+    # {@link #PdfFileReader PdfFileReader} instance.
+    #
+    # @param page The page to add to the document.  This argument should be
+    #             an instance of {@link #PageObject PageObject}.
+    # @param index Position at which the page will be inserted.
+    def insertPage(self, page, index=0):
+        self._addPage(page, lambda l, p: l.insert(index, p))
+
+    ##
+    # Retrieves a page by number from this PDF file.
+    # @return Returns a {@link #PageObject PageObject} instance.
+    def getPage(self, pageNumber):
        pages = self.getObject(self._pages)
-        pages["/Kids"].append(page)
-        pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
+        # XXX: crude hack
+        return pages["/Kids"][pageNumber].getObject()
+
+    ##
+    # Return the number of pages.
+    # @return The number of pages.
+    def getNumPages(self):
+        pages = self.getObject(self._pages)
+        return int(pages[NameObject("/Count")])
+
+    ##
+    # Append a blank page to this PDF file and returns it. If no page size
+    # is specified, use the size of the last page; throw
+    # PageSizeNotDefinedError if it doesn't exist.
+    # @param width The width of the new page expressed in default user
+    # space units.
+    # @param height The height of the new page expressed in default user
+    # space units.
+    def addBlankPage(self, width=None, height=None):
+        page = PageObject.createBlankPage(self, width, height)
+        self.addPage(page)
+        return page
+
+    ##
+    # Insert a blank page to this PDF file and returns it. If no page size
+    # is specified, use the size of the page in the given index; throw
+    # PageSizeNotDefinedError if it doesn't exist.
+    # @param width  The width of the new page expressed in default user
+    #               space units.
+    # @param height The height of the new page expressed in default user
+    #               space units.
+    # @param index  Position to add the page.
+    def insertBlankPage(self, width=None, height=None, index=0):
+        if width is None or height is None and \
+                (self.getNumPages() - 1) >= index:
+            oldpage = self.getPage(index)
+            width = oldpage.mediaBox.getWidth()
+            height = oldpage.mediaBox.getHeight()
+        page = PageObject.createBlankPage(self, width, height)
+        self.insertPage(page, index)
+        return page

    ##
    # Encrypt this PDF file with the PDF Standard encryption handler.
@ -119,7 +197,7 @@ class PdfFileWriter(object):
    # encryption.  When false, 40bit encryption will be used.  By default, this
    # flag is on.
    def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
-        import md5, time, random
+        import time, random
        if owner_pwd == None:
            owner_pwd = user_pwd
        if use_128bit:
@ -133,8 +211,8 @@ class PdfFileWriter(object):
        # permit everything:
        P = -1
        O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
-        ID_1 = md5.new(repr(time.time())).digest()
-        ID_2 = md5.new(repr(random.random())).digest()
+        ID_1 = md5(repr(time.time())).digest()
+        ID_2 = md5(repr(random.random())).digest()
        self._ID = ArrayObject((ByteStringObject(ID_1), ByteStringObject(ID_2)))
        if rev == 2:
            U, key = _alg34(user_pwd, O, P, ID_1)
@ -160,9 +238,28 @@ class PdfFileWriter(object):
    # @param stream An object to write the file to.  The object must support
    # the write method, and the tell method, similar to a file object.
    def write(self, stream):
-        import struct, md5
+        import struct

        externalReferenceMap = {}
+
+        # PDF objects sometimes have circular references to their /Page objects
+        # inside their object tree (for example, annotations).  Those will be
+        # indirect references to objects that we've recreated in this PDF.  To
+        # address this problem, PageObject's store their original object
+        # reference number, and we add it to the external reference map before
+        # we sweep for indirect references.  This forces self-page-referencing
+        # trees to reference the correct new object location, rather than
+        # copying in a new copy of the page object.
+        for objIndex in xrange(len(self._objects)):
+            obj = self._objects[objIndex]
+            if isinstance(obj, PageObject) and obj.indirectRef != None:
+                data = obj.indirectRef
+                if not externalReferenceMap.has_key(data.pdf):
+                    externalReferenceMap[data.pdf] = {}
+                if not externalReferenceMap[data.pdf].has_key(data.generation):
+                    externalReferenceMap[data.pdf][data.generation] = {}
+                externalReferenceMap[data.pdf][data.generation][data.idnum] = IndirectObject(objIndex + 1, 0, self)
+
        self.stack = []
        self._sweepIndirectReferences(externalReferenceMap, self._root)
        del self.stack
@ -181,7 +278,7 @@ class PdfFileWriter(object):
                pack2 = struct.pack("<i", 0)[:2]
                key = self._encrypt_key + pack1 + pack2
                assert len(key) == (len(self._encrypt_key) + 5)
-                md5_hash = md5.new(key).digest()
+                md5_hash = md5(key).digest()
                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
            obj.writeToStream(stream, key)
            stream.write("\nendobj\n")
@ -487,7 +584,7 @@ class PdfFileReader(object):
    pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
            None, None)

-    def _flatten(self, pages=None, inherit=None):
+    def _flatten(self, pages=None, inherit=None, indirectRef=None):
        inheritablePageAttributes = (
            NameObject("/Resources"), NameObject("/MediaBox"),
            NameObject("/CropBox"), NameObject("/Rotate")
@ -504,14 +601,17 @@ class PdfFileReader(object):
                if pages.has_key(attr):
                    inherit[attr] = pages[attr]
            for page in pages["/Kids"]:
-                self._flatten(page.getObject(), inherit)
+                addt = {}
+                if isinstance(page, IndirectObject):
+                    addt["indirectRef"] = page
+                self._flatten(page.getObject(), inherit, **addt)
        elif t == "/Page":
            for attr,value in inherit.items():
                # if the page has it's own value, it does not inherit the
                # parent's value:
                if not pages.has_key(attr):
                    pages[attr] = value
-            pageObj = PageObject(self)
+            pageObj = PageObject(self, indirectRef)
            pageObj.update(pages)
            self.flattenedPages.append(pageObj)

@ -554,12 +654,12 @@ class PdfFileReader(object):
            if not hasattr(self, '_decryption_key'):
                raise Exception, "file has not been decrypted"
            # otherwise, decrypt here...
-            import struct, md5
+            import struct
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
-            md5_hash = md5.new(key).digest()
+            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

@ -890,11 +990,46 @@ def createRectangleAccessor(name, fallback):
 ##
 # This class represents a single page within a PDF file.  Typically this object
 # will be created by accessing the {@link #PdfFileReader.getPage getPage}
-# function of the {@link #PdfFileReader PdfFileReader} class.
+# function of the {@link #PdfFileReader PdfFileReader} class, but it is
+# also possible to create an empty page with the createBlankPage static
+# method.
+# @param pdf PDF file the page belongs to (optional, defaults to None).
 class PageObject(DictionaryObject):
-    def __init__(self, pdf):
+    def __init__(self, pdf=None, indirectRef=None):
        DictionaryObject.__init__(self)
        self.pdf = pdf
+        # Stores the original indirect reference to this object in its source PDF
+        self.indirectRef = indirectRef
+
+    ##
+    # Returns a new blank page.
+    # If width or height is None, try to get the page size from the
+    # last page of pdf. If pdf is None or contains no page, a
+    # PageSizeNotDefinedError is raised.
+    # @param pdf    PDF file the page belongs to
+    # @param width  The width of the new page expressed in default user
+    #               space units.
+    # @param height The height of the new page expressed in default user
+    #               space units.
+    def createBlankPage(pdf=None, width=None, height=None):
+        page = PageObject(pdf)
+
+        # Creates a new page (cf PDF Reference  7.7.3.3)
+        page.__setitem__(NameObject('/Type'), NameObject('/Page'))
+        page.__setitem__(NameObject('/Parent'), NullObject())
+        page.__setitem__(NameObject('/Resources'), DictionaryObject())
+        if width is None or height is None:
+            if pdf is not None and pdf.getNumPages() > 0:
+                lastpage = pdf.getPage(pdf.getNumPages() - 1)
+                width = lastpage.mediaBox.getWidth()
+                height = lastpage.mediaBox.getHeight()
+            else:
+                raise utils.PageSizeNotDefinedError()
+        page.__setitem__(NameObject('/MediaBox'),
+            RectangleObject([0, 0, width, height]))
+
+        return page
+    createBlankPage = staticmethod(createBlankPage)

    ##
    # Rotates a page clockwise by increments of 90 degrees.
@ -931,7 +1066,7 @@ class PageObject(DictionaryObject):
                renameRes[key] = newname
                newRes[newname] = page2Res[key]
            elif not newRes.has_key(key):
-                newRes[key] = page2Res[key]
+                newRes[key] = page2Res.raw_get(key)
        return newRes, renameRes
    _mergeResources = staticmethod(_mergeResources)

@ -957,6 +1092,26 @@ class PageObject(DictionaryObject):
        return stream
    _pushPopGS = staticmethod(_pushPopGS)

+    def _addTransformationMatrix(contents, pdf, ctm):
+        # adds transformation matrix at the beginning of the given
+        # contents stream.
+        a, b, c, d, e, f = ctm
+        contents = ContentStream(contents, pdf)
+        contents.operations.insert(0, [[FloatObject(a), FloatObject(b),
+            FloatObject(c), FloatObject(d), FloatObject(e),
+            FloatObject(f)], " cm"])
+        return contents
+    _addTransformationMatrix = staticmethod(_addTransformationMatrix)
+
+    ##
+    # Returns the /Contents object, or None if it doesn't exist.
+    # /Contents is optionnal, as described in PDF Reference  7.7.3.3
+    def getContents(self):
+      if self.has_key("/Contents"):
+        return self["/Contents"].getObject()
+      else:
+        return None
+
    ##
    # Merges the content streams of two pages into one.  Resource references
    # (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc
@ -968,7 +1123,23 @@ class PageObject(DictionaryObject):
    # @param page2 An instance of {@link #PageObject PageObject} to be merged
    #              into this one.
    def mergePage(self, page2):
+        self._mergePage(page2)

+    ##
+    # Actually merges the content streams of two pages into one. Resource
+    # references (i.e. fonts) are maintained from both pages. The
+    # mediabox/cropbox/etc of this page are not altered. The parameter page's
+    # content stream will be added to the end of this page's content stream,
+    # meaning that it will be drawn after, or "on top" of this page.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged
+    #              into this one.
+    # @param page2transformation A fuction which applies a transformation to
+    #                            the content stream of page2. Takes: page2
+    #                            contents stream. Must return: new contents
+    #                            stream. If omitted, the content stream will
+    #                            not be modified.
+    def _mergePage(self, page2, page2transformation=None):
        # First we work on merging the resource dictionaries.  This allows us
        # to find out what symbols in the content streams we might need to
        # rename.
@ -978,7 +1149,7 @@ class PageObject(DictionaryObject):
        originalResources = self["/Resources"].getObject()
        page2Resources = page2["/Resources"].getObject()

-        for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading":
+        for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading", "/Properties":
            new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
            if new:
                newResources[NameObject(res)] = new
@ -993,17 +1164,191 @@ class PageObject(DictionaryObject):

        newContentArray = ArrayObject()

-        originalContent = self["/Contents"].getObject()
-        newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf))
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContentArray.append(PageObject._pushPopGS(
+                  originalContent, self.pdf))

-        page2Content = page2['/Contents'].getObject()
-        page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf)
-        page2Content = PageObject._pushPopGS(page2Content, self.pdf)
-        newContentArray.append(page2Content)
+        page2Content = page2.getContents()
+        if page2Content is not None:
+            if page2transformation is not None:
+                page2Content = page2transformation(page2Content)
+            page2Content = PageObject._contentStreamRename(
+                page2Content, rename, self.pdf)
+            page2Content = PageObject._pushPopGS(page2Content, self.pdf)
+            newContentArray.append(page2Content)

        self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
        self[NameObject('/Resources')] = newResources

+    ##
+    # This is similar to mergePage, but a transformation matrix is
+    # applied to the merged stream.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param ctm   A 6 elements tuple containing the operands of the
+    #              transformation matrix
+    def mergeTransformedPage(self, page2, ctm):
+        self._mergePage(page2, lambda page2Content:
+            PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm))
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is scaled
+    # by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param factor The scaling factor
+    def mergeScaledPage(self, page2, factor):
+        # CTM to scale : [ sx 0 0 sy 0 0 ]
+        return self.mergeTransformedPage(page2, [factor, 0,
+                                                 0,      factor,
+                                                 0,      0])
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is rotated
+    # by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param rotation The angle of the rotation, in degrees
+    def mergeRotatedPage(self, page2, rotation):
+        rotation = math.radians(rotation)
+        return self.mergeTransformedPage(page2,
+            [math.cos(rotation),  math.sin(rotation),
+             -math.sin(rotation), math.cos(rotation),
+             0,                   0])
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is translated
+    # by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param tx    The translation on X axis
+    # @param tx    The translation on Y axis
+    def mergeTranslatedPage(self, page2, tx, ty):
+        return self.mergeTransformedPage(page2, [1,  0,
+                                                 0,  1,
+                                                 tx, ty])
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is rotated
+    # and scaled by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param rotation The angle of the rotation, in degrees
+    # @param factor The scaling factor
+    def mergeRotatedScaledPage(self, page2, rotation, scale):
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation),0],
+                    [-math.sin(rotation),math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale,0,    0],
+                   [0,    scale,0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+
+        return self.mergeTransformedPage(page2,
+                                         [ctm[0][0], ctm[0][1],
+                                          ctm[1][0], ctm[1][1],
+                                          ctm[2][0], ctm[2][1]])
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is translated
+    # and scaled by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param scale The scaling factor
+    # @param tx    The translation on X axis
+    # @param tx    The translation on Y axis
+    def mergeScaledTranslatedPage(self, page2, scale, tx, ty):
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx,ty,1]]
+        scaling = [[scale,0,    0],
+                   [0,    scale,0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(scaling, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]])
+
+    ##
+    # This is similar to mergePage, but the stream to be merged is translated,
+    # rotated and scaled by appling a transformation matrix.
+    #
+    # @param page2 An instance of {@link #PageObject PageObject} to be merged.
+    # @param tx    The translation on X axis
+    # @param ty    The translation on Y axis
+    # @param rotation The angle of the rotation, in degrees
+    # @param scale The scaling factor
+    def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty):
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx,ty,1]]
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation),0],
+                    [-math.sin(rotation),math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale,0,    0],
+                   [0,    scale,0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+        ctm = utils.matrixMultiply(ctm, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]])
+
+    ##
+    # Applys a transformation matrix the page.
+    #
+    # @param ctm   A 6 elements tuple containing the operands of the
+    #              transformation matrix
+    def addTransformation(self, ctm):
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContent = PageObject._addTransformationMatrix(
+                originalContent, self.pdf, ctm)
+            newContent = PageObject._pushPopGS(newContent, self.pdf)
+            self[NameObject('/Contents')] = newContent
+
+    ##
+    # Scales a page by the given factors by appling a transformation
+    # matrix to its content and updating the page size.
+    #
+    # @param sx The scaling factor on horizontal axis
+    # @param sy The scaling factor on vertical axis
+    def scale(self, sx, sy):
+        self.addTransformation([sx, 0,
+                                0,  sy,
+                                0,  0])
+        self.mediaBox = RectangleObject([
+            float(self.mediaBox.getLowerLeft_x()) * sx,
+            float(self.mediaBox.getLowerLeft_y()) * sy,
+            float(self.mediaBox.getUpperRight_x()) * sx,
+            float(self.mediaBox.getUpperRight_y()) * sy])
+
+    ##
+    # Scales a page by the given factor by appling a transformation
+    # matrix to its content and updating the page size.
+    #
+    # @param factor The scaling factor
+    def scaleBy(self, factor):
+        self.scale(factor, factor)
+
+    ##
+    # Scales a page to the specified dimentions by appling a
+    # transformation matrix to its content and updating the page size.
+    #
+    # @param width The new width
+    # @param height The new heigth
+    def scaleTo(self, width, height):
+        sx = width / (self.mediaBox.getUpperRight_x() -
+                      self.mediaBox.getLowerLeft_x ())
+        sy = height / (self.mediaBox.getUpperRight_y() -
+                       self.mediaBox.getLowerLeft_x ())
+        self.scale(sx, sy)
+
    ##
    # Compresses the size of this page by joining all content streams and
    # applying a FlateDecode filter.
@ -1012,10 +1357,11 @@ class PageObject(DictionaryObject):
    # However, it is possible that this function will perform no action if
    # content stream compression becomes "automatic" for some reason.
    def compressContentStreams(self):
-        content = self["/Contents"].getObject()
-        if not isinstance(content, ContentStream):
-            content = ContentStream(content, self.pdf)
-        self[NameObject("/Contents")] = content.flateEncode()
+        content = self.getContents()
+        if content is not None:
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, self.pdf)
+            self[NameObject("/Contents")] = content.flateEncode()

    ##
    # Locate all text drawing commands, in the order they are provided in the
@ -1369,8 +1715,8 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    password = (password + _encryption_padding)[:32]
    # 2. Initialize the MD5 hash function and pass the result of step 1 as
    # input to this function.
-    import md5, struct
-    m = md5.new(password)
+    import struct
+    m = md5(password)
    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
    # function.
    m.update(owner_entry)
@ -1394,7 +1740,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    # /Length entry.
    if rev >= 3:
        for i in range(50):
-            md5_hash = md5.new(md5_hash[:keylen]).digest()
+            md5_hash = md5(md5_hash[:keylen]).digest()
    # 9. Set the encryption key to the first n bytes of the output from the
    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
    # greater, depends on the value of the encryption dictionary's /Length
@ -1436,14 +1782,13 @@ def _alg33_1(password, rev, keylen):
    password = (password + _encryption_padding)[:32]
    # 2. Initialize the MD5 hash function and pass the result of step 1 as
    # input to this function.
-    import md5
-    m = md5.new(password)
+    m = md5(password)
    # 3. (Revision 3 or greater) Do the following 50 times: Take the output
    # from the previous MD5 hash and pass it as input into a new MD5 hash.
    md5_hash = m.digest()
    if rev >= 3:
        for i in range(50):
-            md5_hash = md5.new(md5_hash).digest()
+            md5_hash = md5(md5_hash).digest()
    # 4. Create an RC4 encryption key using the first n bytes of the output
    # from the final MD5 hash, where n is always 5 for revision 2 but, for
    # revision 3 or greater, depends on the value of the encryption
@ -1473,8 +1818,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
    # shown in step 1 of Algorithm 3.2 as input to this function. 
-    import md5
-    m = md5.new()
+    m = md5()
    m.update(_encryption_padding)
    # 3. Pass the first element of the file's file identifier array (the value
    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
--- a/bin/report/pyPdf/utils.py
+++ b/bin/report/pyPdf/utils.py
@ -1,111 +1,122 @@
-# -*- coding: utf-8 -*-
-# vim: sw=4:expandtab:foldmethod=marker
-#
-# Copyright (c) 2006, Mathieu Fenniak
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-
-"""
-Utility functions for PDF library.
-"""
-__author__ = "Mathieu Fenniak"
-__author_email__ = "biziqe@mathieu.fenniak.net"
-
-#ENABLE_PSYCO = False
-#if ENABLE_PSYCO:
-#    try:
-#        import psyco
-#    except ImportError:
-#        ENABLE_PSYCO = False
-#
-#if not ENABLE_PSYCO:
-#    class psyco:
-#        def proxy(func):
-#            return func
-#        proxy = staticmethod(proxy)
-
-def readUntilWhitespace(stream, maxchars=None):
-    txt = ""
-    while True:
-        tok = stream.read(1)
-        if tok.isspace() or not tok:
-            break
-        txt += tok
-        if len(txt) == maxchars:
-            break
-    return txt
-
-def readNonWhitespace(stream):
-    tok = ' '
-    while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
-        tok = stream.read(1)
-    return tok
-
-class ConvertFunctionsToVirtualList(object):
-    def __init__(self, lengthFunction, getFunction):
-        self.lengthFunction = lengthFunction
-        self.getFunction = getFunction
-
-    def __len__(self):
-        return self.lengthFunction()
-
-    def __getitem__(self, index):
-        if not isinstance(index, int):
-            raise TypeError, "sequence indices must be integers"
-        len_self = len(self)
-        if index < 0:
-            # support negative indexes
-            index = len_self + index
-        if index < 0 or index >= len_self:
-            raise IndexError, "sequence index out of range"
-        return self.getFunction(index)
-
-def RC4_encrypt(key, plaintext):
-    S = [i for i in range(256)]
-    j = 0
-    for i in range(256):
-        j = (j + S[i] + ord(key[i % len(key)])) % 256
-        S[i], S[j] = S[j], S[i]
-    i, j = 0, 0
-    retval = ""
-    for x in range(len(plaintext)):
-        i = (i + 1) % 256
-        j = (j + S[i]) % 256
-        S[i], S[j] = S[j], S[i]
-        t = S[(S[i] + S[j]) % 256]
-        retval += chr(ord(plaintext[x]) ^ t)
-    return retval
-
-class PdfReadError(Exception):
-    pass
-
-if __name__ == "__main__":
-    # test RC4
-    out = RC4_encrypt("Key", "Plaintext")
-    print repr(out)
-    pt = RC4_encrypt("Key", out)
-    print repr(pt)
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+#ENABLE_PSYCO = False
+#if ENABLE_PSYCO:
+#    try:
+#        import psyco
+#    except ImportError:
+#        ENABLE_PSYCO = False
+#
+#if not ENABLE_PSYCO:
+#    class psyco:
+#        def proxy(func):
+#            return func
+#        proxy = staticmethod(proxy)
+
+def readUntilWhitespace(stream, maxchars=None):
+    txt = ""
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+def readNonWhitespace(stream):
+    tok = ' '
+    while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
+        tok = stream.read(1)
+    return tok
+
+class ConvertFunctionsToVirtualList(object):
+    def __init__(self, lengthFunction, getFunction):
+        self.lengthFunction = lengthFunction
+        self.getFunction = getFunction
+
+    def __len__(self):
+        return self.lengthFunction()
+
+    def __getitem__(self, index):
+        if not isinstance(index, int):
+            raise TypeError, "sequence indices must be integers"
+        len_self = len(self)
+        if index < 0:
+            # support negative indexes
+            index = len_self + index
+        if index < 0 or index >= len_self:
+            raise IndexError, "sequence index out of range"
+        return self.getFunction(index)
+
+def RC4_encrypt(key, plaintext):
+    S = [i for i in range(256)]
+    j = 0
+    for i in range(256):
+        j = (j + S[i] + ord(key[i % len(key)])) % 256
+        S[i], S[j] = S[j], S[i]
+    i, j = 0, 0
+    retval = ""
+    for x in range(len(plaintext)):
+        i = (i + 1) % 256
+        j = (j + S[i]) % 256
+        S[i], S[j] = S[j], S[i]
+        t = S[(S[i] + S[j]) % 256]
+        retval += chr(ord(plaintext[x]) ^ t)
+    return retval
+
+def matrixMultiply(a, b):
+    return [[sum([float(i)*float(j)
+                  for i, j in zip(row, col)]
+                ) for col in zip(*b)]
+            for row in a]
+
+class PyPdfError(Exception):
+    pass
+
+class PdfReadError(PyPdfError):
+    pass
+
+class PageSizeNotDefinedError(PyPdfError):
+    pass
+
+if __name__ == "__main__":
+    # test RC4
+    out = RC4_encrypt("Key", "Plaintext")
+    print repr(out)
+    pt = RC4_encrypt("Key", out)
+    print repr(pt)
--- a/bin/report/pyPdf/xmp.py
+++ b/bin/report/pyPdf/xmp.py
@ -1,356 +1,355 @@
-# -*- coding: utf-8 -*-
-import re
-import datetime
-import decimal
-from generic import PdfObject
-from xml.dom import getDOMImplementation
-from xml.dom.minidom import parseString
-
-RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
-XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
-PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
-XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
-
-# What is the PDFX namespace, you might ask?  I might ask that too.  It's
-# a completely undocumented namespace used to place "custom metadata"
-# properties, which are arbitrary metadata properties with no semantic or
-# documented meaning.  Elements in the namespace are key/value-style storage,
-# where the element name is the key and the content is the value.  The keys
-# are transformed into valid XML identifiers by substituting an invalid
-# identifier character with \u2182 followed by the unicode hex ID of the
-# original character.  A key like "my car" is therefore "my\u21820020car".
-#
-# \u2182, in case you're wondering, is the unicode character
-# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
-# escaping characters.
-#
-# Intentional users of the pdfx namespace should be shot on sight.  A
-# custom data schema and sensical XML elements could be used instead, as is
-# suggested by Adobe's own documentation on XMP (under "Extensibility of
-# Schemas").
-#
-# Information presented here on the /pdfx/ schema is a result of limited
-# reverse engineering, and does not constitute a full specification.
-PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
-
-iso8601 = re.compile("""
-        (?P<year>[0-9]{4})
-        (-
-            (?P<month>[0-9]{2})
-            (-
-                (?P<day>[0-9]+)
-                (T
-                    (?P<hour>[0-9]{2}):
-                    (?P<minute>[0-9]{2})
-                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
-                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
-                )?
-            )?
-        )?
-        """, re.VERBOSE)
-
-##
-# An object that represents Adobe XMP metadata.
-class XmpInformation(PdfObject):
-
-    def __init__(self, stream):
-        self.stream = stream
-        docRoot = parseString(self.stream.getData())
-        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
-        self.cache = {}
-
-    def writeToStream(self, stream, encryption_key):
-        self.stream.writeToStream(stream, encryption_key)
-
-    def getElement(self, aboutUri, namespace, name):
-        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
-            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
-                attr = desc.getAttributeNodeNS(namespace, name)
-                if attr != None:
-                    yield attr
-                for element in desc.getElementsByTagNameNS(namespace, name):
-                    yield element
-
-    def getNodesInNamespace(self, aboutUri, namespace):
-        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
-            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
-                for i in range(desc.attributes.length):
-                    attr = desc.attributes.item(i)
-                    if attr.namespaceURI == namespace:
-                        yield attr
-                for child in desc.childNodes:
-                    if child.namespaceURI == namespace:
-                        yield child
-
-    def _getText(self, element):
-        text = ""
-        for child in element.childNodes:
-            if child.nodeType == child.TEXT_NODE:
-                text += child.data
-        return text
-
-    def _converter_string(value):
-        return value
-
-    def _converter_date(value):
-        m = iso8601.match(value)
-        year = int(m.group("year"))
-        month = int(m.group("month") or "1")
-        day = int(m.group("day") or "1")
-        hour = int(m.group("hour") or "0")
-        minute = int(m.group("minute") or "0")
-        second = decimal.Decimal(m.group("second") or "0")
-        seconds = second.to_integral(decimal.ROUND_FLOOR)
-        milliseconds = (second - seconds) * 1000000
-        tzd = m.group("tzd") or "Z"
-        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
-        if tzd != "Z":
-            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
-            tzd_hours *= -1
-            if tzd_hours < 0:
-                tzd_minutes *= -1
-            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
-        return dt
-    _test_converter_date = staticmethod(_converter_date)
-
-    def _getter_bag(namespace, name, converter):
-        def get(self):
-            cached = self.cache.get(namespace, {}).get(name)
-            if cached:
-                return cached
-            retval = []
-            for element in self.getElement("", namespace, name):
-                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
-                if len(bags):
-                    for bag in bags:
-                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
-                            value = self._getText(item)
-                            value = converter(value)
-                            retval.append(value)
-            ns_cache = self.cache.setdefault(namespace, {})
-            ns_cache[name] = retval
-            return retval
-        return get
-
-    def _getter_seq(namespace, name, converter):
-        def get(self):
-            cached = self.cache.get(namespace, {}).get(name)
-            if cached:
-                return cached
-            retval = []
-            for element in self.getElement("", namespace, name):
-                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
-                if len(seqs):
-                    for seq in seqs:
-                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
-                            value = self._getText(item)
-                            value = converter(value)
-                            retval.append(value)
-                else:
-                    value = converter(self._getText(element))
-                    retval.append(value)
-            ns_cache = self.cache.setdefault(namespace, {})
-            ns_cache[name] = retval
-            return retval
-        return get
-
-    def _getter_langalt(namespace, name, converter):
-        def get(self):
-            cached = self.cache.get(namespace, {}).get(name)
-            if cached:
-                return cached
-            retval = {}
-            for element in self.getElement("", namespace, name):
-                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
-                if len(alts):
-                    for alt in alts:
-                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
-                            value = self._getText(item)
-                            value = converter(value)
-                            retval[item.getAttribute("xml:lang")] = value
-                else:
-                    retval["x-default"] = converter(self._getText(element))
-            ns_cache = self.cache.setdefault(namespace, {})
-            ns_cache[name] = retval
-            return retval
-        return get
-
-    def _getter_single(namespace, name, converter):
-        def get(self):
-            cached = self.cache.get(namespace, {}).get(name)
-            if cached:
-                return cached
-            value = None
-            for element in self.getElement("", namespace, name):
-                if element.nodeType == element.ATTRIBUTE_NODE:
-                    value = element.nodeValue
-                else:
-                    value = self._getText(element)
-                break
-            if value != None:
-                value = converter(value)
-            ns_cache = self.cache.setdefault(namespace, {})
-            ns_cache[name] = value
-            return value
-        return get
-
-    ##
-    # Contributors to the resource (other than the authors).  An unsorted
-    # array of names.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
-
-    ##
-    # Text describing the extent or scope of the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
-
-    ##
-    # A sorted array of names of the authors of the resource, listed in order
-    # of precedence.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
-
-    ##
-    # A sorted array of dates (datetime.datetime instances) of signifigance to
-    # the resource.  The dates and times are in UTC.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
-
-    ##
-    # A language-keyed dictionary of textual descriptions of the content of the
-    # resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
-
-    ##
-    # The mime-type of the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
-
-    ##
-    # Unique identifier of the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
-
-    ##
-    # An unordered array specifying the languages used in the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
-
-    ##
-    # An unordered array of publisher names.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
-
-    ##
-    # An unordered array of text descriptions of relationships to other
-    # documents.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
-
-    ##
-    # A language-keyed dictionary of textual descriptions of the rights the
-    # user has to this resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
-
-    ##
-    # Unique identifier of the work from which this resource was derived.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
-
-    ##
-    # An unordered array of descriptive phrases or keywrods that specify the
-    # topic of the content of the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
-
-    ##
-    # A language-keyed dictionary of the title of the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
-
-    ##
-    # An unordered array of textual descriptions of the document type.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
-
-    ##
-    # An unformatted text string representing document keywords.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
-
-    ##
-    # The PDF file version, for example 1.0, 1.3.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
-
-    ##
-    # The name of the tool that created the PDF document.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
-
-    ##
-    # The date and time the resource was originally created.  The date and
-    # time are returned as a UTC datetime.datetime object.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
-    
-    ##
-    # The date and time the resource was last modified.  The date and time
-    # are returned as a UTC datetime.datetime object.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
-
-    ##
-    # The date and time that any metadata for this resource was last
-    # changed.  The date and time are returned as a UTC datetime.datetime
-    # object.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
-
-    ##
-    # The name of the first known tool used to create the resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
-
-    ##
-    # The common identifier for all versions and renditions of this resource.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
-
-    ##
-    # An identifier for a specific incarnation of a document, updated each
-    # time a file is saved.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
-
-    def custom_properties(self):
-        if not hasattr(self, "_custom_properties"):
-            self._custom_properties = {}
-            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
-                key = node.localName
-                while True:
-                    # see documentation about PDFX_NAMESPACE earlier in file
-                    idx = key.find(u"\u2182")
-                    if idx == -1:
-                        break
-                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
-                if node.nodeType == node.ATTRIBUTE_NODE:
-                    value = node.nodeValue
-                else:
-                    value = self._getText(node)
-                self._custom_properties[key] = value
-        return self._custom_properties
-
-    ##
-    # Retrieves custom metadata properties defined in the undocumented pdfx
-    # metadata schema.
-    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
-    # @return Returns a dictionary of key/value items for custom metadata
-    # properties.
-    custom_properties = property(custom_properties)
-
-
+import re
+import datetime
+import decimal
+from generic import PdfObject
+from xml.dom import getDOMImplementation
+from xml.dom.minidom import parseString
+
+RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
+XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
+PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
+XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
+
+# What is the PDFX namespace, you might ask?  I might ask that too.  It's
+# a completely undocumented namespace used to place "custom metadata"
+# properties, which are arbitrary metadata properties with no semantic or
+# documented meaning.  Elements in the namespace are key/value-style storage,
+# where the element name is the key and the content is the value.  The keys
+# are transformed into valid XML identifiers by substituting an invalid
+# identifier character with \u2182 followed by the unicode hex ID of the
+# original character.  A key like "my car" is therefore "my\u21820020car".
+#
+# \u2182, in case you're wondering, is the unicode character
+# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
+# escaping characters.
+#
+# Intentional users of the pdfx namespace should be shot on sight.  A
+# custom data schema and sensical XML elements could be used instead, as is
+# suggested by Adobe's own documentation on XMP (under "Extensibility of
+# Schemas").
+#
+# Information presented here on the /pdfx/ schema is a result of limited
+# reverse engineering, and does not constitute a full specification.
+PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
+
+iso8601 = re.compile("""
+        (?P<year>[0-9]{4})
+        (-
+            (?P<month>[0-9]{2})
+            (-
+                (?P<day>[0-9]+)
+                (T
+                    (?P<hour>[0-9]{2}):
+                    (?P<minute>[0-9]{2})
+                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
+                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
+                )?
+            )?
+        )?
+        """, re.VERBOSE)
+
+##
+# An object that represents Adobe XMP metadata.
+class XmpInformation(PdfObject):
+
+    def __init__(self, stream):
+        self.stream = stream
+        docRoot = parseString(self.stream.getData())
+        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
+        self.cache = {}
+
+    def writeToStream(self, stream, encryption_key):
+        self.stream.writeToStream(stream, encryption_key)
+
+    def getElement(self, aboutUri, namespace, name):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                attr = desc.getAttributeNodeNS(namespace, name)
+                if attr != None:
+                    yield attr
+                for element in desc.getElementsByTagNameNS(namespace, name):
+                    yield element
+
+    def getNodesInNamespace(self, aboutUri, namespace):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                for i in range(desc.attributes.length):
+                    attr = desc.attributes.item(i)
+                    if attr.namespaceURI == namespace:
+                        yield attr
+                for child in desc.childNodes:
+                    if child.namespaceURI == namespace:
+                        yield child
+
+    def _getText(self, element):
+        text = ""
+        for child in element.childNodes:
+            if child.nodeType == child.TEXT_NODE:
+                text += child.data
+        return text
+
+    def _converter_string(value):
+        return value
+
+    def _converter_date(value):
+        m = iso8601.match(value)
+        year = int(m.group("year"))
+        month = int(m.group("month") or "1")
+        day = int(m.group("day") or "1")
+        hour = int(m.group("hour") or "0")
+        minute = int(m.group("minute") or "0")
+        second = decimal.Decimal(m.group("second") or "0")
+        seconds = second.to_integral(decimal.ROUND_FLOOR)
+        milliseconds = (second - seconds) * 1000000
+        tzd = m.group("tzd") or "Z"
+        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
+        if tzd != "Z":
+            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
+            tzd_hours *= -1
+            if tzd_hours < 0:
+                tzd_minutes *= -1
+            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
+        return dt
+    _test_converter_date = staticmethod(_converter_date)
+
+    def _getter_bag(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
+                if len(bags):
+                    for bag in bags:
+                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_seq(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
+                if len(seqs):
+                    for seq in seqs:
+                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+                else:
+                    value = converter(self._getText(element))
+                    retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_langalt(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = {}
+            for element in self.getElement("", namespace, name):
+                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
+                if len(alts):
+                    for alt in alts:
+                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval[item.getAttribute("xml:lang")] = value
+                else:
+                    retval["x-default"] = converter(self._getText(element))
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_single(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            value = None
+            for element in self.getElement("", namespace, name):
+                if element.nodeType == element.ATTRIBUTE_NODE:
+                    value = element.nodeValue
+                else:
+                    value = self._getText(element)
+                break
+            if value != None:
+                value = converter(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = value
+            return value
+        return get
+
+    ##
+    # Contributors to the resource (other than the authors).  An unsorted
+    # array of names.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
+
+    ##
+    # Text describing the extent or scope of the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
+
+    ##
+    # A sorted array of names of the authors of the resource, listed in order
+    # of precedence.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
+
+    ##
+    # A sorted array of dates (datetime.datetime instances) of signifigance to
+    # the resource.  The dates and times are in UTC.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
+
+    ##
+    # A language-keyed dictionary of textual descriptions of the content of the
+    # resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
+
+    ##
+    # The mime-type of the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
+
+    ##
+    # Unique identifier of the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
+
+    ##
+    # An unordered array specifying the languages used in the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
+
+    ##
+    # An unordered array of publisher names.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
+
+    ##
+    # An unordered array of text descriptions of relationships to other
+    # documents.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
+
+    ##
+    # A language-keyed dictionary of textual descriptions of the rights the
+    # user has to this resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
+
+    ##
+    # Unique identifier of the work from which this resource was derived.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
+
+    ##
+    # An unordered array of descriptive phrases or keywrods that specify the
+    # topic of the content of the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
+
+    ##
+    # A language-keyed dictionary of the title of the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
+
+    ##
+    # An unordered array of textual descriptions of the document type.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
+
+    ##
+    # An unformatted text string representing document keywords.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
+
+    ##
+    # The PDF file version, for example 1.0, 1.3.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
+
+    ##
+    # The name of the tool that created the PDF document.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
+
+    ##
+    # The date and time the resource was originally created.  The date and
+    # time are returned as a UTC datetime.datetime object.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
+    
+    ##
+    # The date and time the resource was last modified.  The date and time
+    # are returned as a UTC datetime.datetime object.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
+
+    ##
+    # The date and time that any metadata for this resource was last
+    # changed.  The date and time are returned as a UTC datetime.datetime
+    # object.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
+
+    ##
+    # The name of the first known tool used to create the resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
+
+    ##
+    # The common identifier for all versions and renditions of this resource.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
+
+    ##
+    # An identifier for a specific incarnation of a document, updated each
+    # time a file is saved.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
+
+    def custom_properties(self):
+        if not hasattr(self, "_custom_properties"):
+            self._custom_properties = {}
+            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
+                key = node.localName
+                while True:
+                    # see documentation about PDFX_NAMESPACE earlier in file
+                    idx = key.find(u"\u2182")
+                    if idx == -1:
+                        break
+                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
+                if node.nodeType == node.ATTRIBUTE_NODE:
+                    value = node.nodeValue
+                else:
+                    value = self._getText(node)
+                self._custom_properties[key] = value
+        return self._custom_properties
+
+    ##
+    # Retrieves custom metadata properties defined in the undocumented pdfx
+    # metadata schema.
+    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
+    # @return Returns a dictionary of key/value items for custom metadata
+    # properties.
+    custom_properties = property(custom_properties)
+
+