odoo/addons/base_report_designer/wizard/tiny_sxw2rml/tiny_sxw2rml.py

364 lines
14 KiB
Python

# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c):
#
# 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
# 2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
#!/usr/bin/python
"""
Tiny SXW2RML - The Open ERP's report engine
Tiny SXW2RMLis part of the Tiny report project.
Tiny Report is a module that allows you to render high quality PDF document
from an OpenOffice template (.sxw) and any relationnal database.
The whole source code is distributed under the terms of the
GNU Public Licence.
(c) 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
(c) 2005-TODAY, Fabien Pinckaers - Tiny sprl
"""
__version__ = '0.9'
import re
import string
import os
import zipfile
import xml.dom.minidom
from reportlab.lib.units import toLength
import base64
class DomApiGeneral:
"""General DOM API utilities."""
def __init__(self,content_string="",file=""):
self.content_string = content_string
self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
def _unitTuple(self,string):
"""Split values and units to a tuple."""
temp = self.re_digits.findall(string)
if not temp:
return (string,"")
else:
return (temp[0])
def stringPercentToFloat(self,string):
temp = string.replace("""%""","")
return float(temp)/100
def findChildrenByName(self,parent,name,attr_dict={}):
"""Helper functions. Does not work recursively.
Optional: also test for certain attribute/value pairs."""
children = []
for c in parent.childNodes:
if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
children.append(c)
if attr_dict == {}:
return children
else:
return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
def _selectForAttributes(self,nodelist,attr_dict):
"Helper function."""
selected_nodes = []
for n in nodelist:
check = 1
for a in attr_dict.keys():
if n.getAttribute(a) != attr_dict[a]:
# at least one incorrect attribute value?
check = 0
if check:
selected_nodes.append(n)
return selected_nodes
def _stringToTuple(self,s):
"""Helper function."""
try:
temp = string.split(s,",")
return int(temp[0]),int(temp[1])
except:
return None
def _tupleToString(self,t):
try:
return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
except:
return None
def _lengthToFloat(self,value):
v = value
if not self.re_digits.search(v):
return v
try:
if v[-4:] == "inch":
# OO files use "inch" instead of "in" in Reportlab units
v = v[:-2]
except:
pass
try:
c = round(toLength(v))
return c
except:
return v
def openOfficeStringUtf8(self,string):
if type(string) == unicode:
return string.encode("utf-8")
tempstring = unicode(string,"cp1252").encode("utf-8")
return tempstring
class DomApi(DomApiGeneral):
"""This class provides a DOM-API for XML-Files from an SXW-Archive."""
def __init__(self,xml_content,xml_styles):
DomApiGeneral.__init__(self)
self.content_dom = xml.dom.minidom.parseString(xml_content)
self.styles_dom = xml.dom.minidom.parseString(xml_styles)
body = self.content_dom.getElementsByTagName("office:body")
self.body = body and body[0]
# TODO:
self.style_dict = {}
self.style_properties_dict = {}
# ******** always use the following order:
self.buildStyleDict()
self.buildStylePropertiesDict()
if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
if self.styles_dom.getElementsByTagName("style:page-layout").__len__()<>0 :
self.page_master = self.styles_dom.getElementsByTagName("style:page-layout")[0]
self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
def buildStylePropertiesDict(self):
for s in self.style_dict.keys():
self.style_properties_dict[s] = self.getStylePropertiesDict(s)
def updateWithPercents(self,dict,updatedict):
"""Sometimes you find values like "115%" in the style hierarchy."""
if not updatedict:
# no style hierarchies for this style? =>
return
new_updatedict = copy.copy(updatedict)
for u in new_updatedict.keys():
try:
if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
number = float(self.re_digits.search(dict[u]).group(1))
unit = self.re_digits.search(dict[u]).group(2)
new_number = self.stringPercentToFloat(new_updatedict[u]) * number
if unit == "pt":
new_number = int(new_number)
# no floats allowed for "pt"
# OOo just takes the int, does not round (try it out!)
new_updatedict[u] = "%s%s" % (new_number,unit)
else:
dict[u] = new_updatedict[u]
except:
dict[u] = new_updatedict[u]
dict.update(new_updatedict)
def normalizeStyleProperties(self):
"""Transfer all style:style-properties attributes from the
self.style_properties_hierarchical dict to the automatic-styles
from content.xml. Use this function to preprocess content.xml for
XSLT transformations etc.Do not try to implement this function
with XSlT - believe me, it's a terrible task..."""
styles_styles = self.styles_dom.getElementsByTagName("style:style")
automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
for s in styles_styles:
automatic_styles.appendChild(s.cloneNode(deep=1))
content_styles = self.content_dom.getElementsByTagName("style:style")
# these are the content_styles with styles_styles added!!!
for s in content_styles:
c = self.findChildrenByName(s,"style:properties")
if c == []:
# some derived automatic styles do not have "style:properties":
temp = self.content_dom.createElement("style:properties")
s.appendChild(temp)
c = self.findChildrenByName(s,"style:properties")
c = c[0]
dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("utf-8")] or {}
for attribute in dict.keys():
c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
def transferStylesXml(self):
"""Transfer certain sub-trees from styles.xml to the normalized content.xml
(see above). It is not necessary to do this - for example - with paragraph styles.
the "normalized" style properties contain all information needed for
further processing."""
# TODO: What about table styles etc.?
outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
t = self.content_dom.createElement("transferredfromstylesxml")
self.document.insertBefore(t,self.body)
t_new = self.body.previousSibling
try:
page_master = self.page_master
t_new.appendChild(page_master.cloneNode(deep=1))
t_new.appendChild(outline_styles[0].cloneNode(deep=1))
except:
pass
def normalizeLength(self):
"""Normalize all lengthes to floats (i.e: 1 inch = 72).
Always use this after "normalizeContent" and "transferStyles"!"""
# TODO: The complex attributes of table cell styles are not transferred yet.
#all_styles = self.content_dom.getElementsByTagName("style:properties")
#all_styles += self.content_dom.getElementsByTagName("draw:image")
all_styles = self.content_dom.getElementsByTagName("*")
for s in all_styles:
for x in s._attrs.keys():
v = s.getAttribute(x)
s.setAttribute(x,"%s" % self._lengthToFloat(v))
# convert float to string first!
def normalizeTableColumns(self):
"""Handle this strange table:number-columns-repeated attribute."""
columns = self.content_dom.getElementsByTagName("table:table-column")
for c in columns:
if c.hasAttribute("table:number-columns-repeated"):
number = int(c.getAttribute("table:number-columns-repeated"))
c.removeAttribute("table:number-columns-repeated")
for i in range(number-1):
(c.parentNode).insertBefore(c.cloneNode(deep=1),c)
def buildStyleDict(self):
"""Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
Caution: in this dict the nodes from two dom apis are merged!"""
for st in (self.styles_dom,self.content_dom):
for s in st.getElementsByTagName("style:style"):
name = s.getAttribute("style:name").encode("utf-8")
self.style_dict[name] = s
return True
def toxml(self):
return self.content_dom.toxml(encoding="utf-8")
def getStylePropertiesDict(self,style_name):
res = {}
if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("utf-8")
res = self.getStylePropertiesDict(parent)
children = self.style_dict[style_name].childNodes
for c in children:
if c.nodeType == c.ELEMENT_NODE and c.nodeName.find("properties")>0 :
for attr in c._attrs.keys():
res[attr] = c.getAttribute(attr).encode("utf-8")
return res
class PyOpenOffice(object):
"""This is the main class which provides all functionality."""
def __init__(self, path='.', save_pict=False):
self.path = path
self.save_pict = save_pict
self.images = {}
def oo_read(self,fname):
z = zipfile.ZipFile(fname,"r")
content = z.read('content.xml')
style = z.read('styles.xml')
all = z.namelist()
for a in all:
if a[:9]=='Pictures/' and len(a)>10:
pic_content = z.read(a)
self.images[a[9:]] = pic_content
if self.save_pict:
f=open(os.path.join(self.path, os.path.basename(a)),"wb")
f.write(pic_content)
f.close()
z.close()
return content,style
def oo_replace(self,content):
regex = [
(r"<para[^>]*/>", ""),
#(r"<text:ordered-list.*?>(.*?)</text:ordered-list>", "$1"),
#(r"<text:unordered-list.*?>(.*?)</text:unordered-list>", "$1"),
(r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
]
for key,val in regex:
content = re.sub(key, val, content)
return content
def unpackNormalize(self,sourcefile):
c,s = self.oo_read(sourcefile)
c = self.oo_replace(c)
dom = DomApi(c,s)
dom.normalizeStyleProperties()
dom.transferStylesXml()
dom.normalizeLength()
dom.normalizeTableColumns()
new_c = dom.toxml()
return new_c
def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
import libxslt
import libxml2
tool = PyOpenOffice(output, save_pict = save_pict)
res = tool.unpackNormalize(sxw_file)
styledoc = libxml2.parseDoc(xsl)
style = libxslt.parseStylesheetDoc(styledoc)
doc = libxml2.parseMemory(res,len(res))
result = style.applyStylesheet(doc, None)
root = result.xpathEval("/document/stylesheet")
if root:
root=root[0]
images = libxml2.newNode("images")
for img in tool.images:
node = libxml2.newNode('image')
node.setProp('name', img)
node.setContent( base64.encodestring(tool.images[img]))
images.addChild(node)
root.addNextSibling(images)
try:
xml = style.saveResultToString(result)
return xml
except:
return result
if __name__ == "__main__":
import optparse
parser = optparse.OptionParser(
version="Tiny Report v%s" % __version__,
usage = 'tiny_sxw2rml.py [options] file.sxw')
parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
(opt, args) = parser.parse_args()
if len(args) != 1:
parser.error("incorrect number of arguments")
import sys
import StringIO
fname = sys.argv[1]
f = StringIO.StringIO(file(fname).read())
xsl_file = 'normalized_oo2rml.xsl'
z = zipfile.ZipFile(fname,"r")
mimetype = z.read('mimetype')
if mimetype.split('/')[-1] == 'vnd.oasis.opendocument.text' :
xsl_file = 'normalized_odt2rml.xsl'
xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), xsl_file)).read()
result = sxw2rml(f, xsl, output=opt.output, save_pict=False)
print result
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: