diff --git a/openerp/addons/base/base_data.xml b/openerp/addons/base/base_data.xml
index 071d4cd0618..7c136a5752f 100644
--- a/openerp/addons/base/base_data.xml
+++ b/openerp/addons/base/base_data.xml
@@ -77,7 +77,8 @@
- Administrator
+ --
+Administrator
diff --git a/openerp/addons/base/base_demo.xml b/openerp/addons/base/base_demo.xml
index cb9c836e0a6..89176d1811b 100644
--- a/openerp/addons/base/base_demo.xml
+++ b/openerp/addons/base/base_demo.xml
@@ -11,7 +11,8 @@
demo
demo
- Mr Demo
+ --
+Mr Demo
diff --git a/openerp/addons/base/i18n/fa.po b/openerp/addons/base/i18n/fa.po
index ed79ae62a70..347abf02cbf 100644
--- a/openerp/addons/base/i18n/fa.po
+++ b/openerp/addons/base/i18n/fa.po
@@ -9,7 +9,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-11-13 05:11+0000\n"
+"X-Launchpad-Export-Date: 2012-11-14 04:40+0000\n"
"X-Generator: Launchpad (build 16251)\n"
"X-Poedit-Country: IRAN, ISLAMIC REPUBLIC OF\n"
"X-Poedit-Language: Persian\n"
diff --git a/openerp/addons/base/res/res_users.py b/openerp/addons/base/res/res_users.py
index f272b3712c6..f975d39b300 100644
--- a/openerp/addons/base/res/res_users.py
+++ b/openerp/addons/base/res/res_users.py
@@ -256,8 +256,9 @@ class res_users(osv.osv):
- else: the default view is overrided and redirected to the partner
view
"""
- if not view_id and view_type == 'form':
- return self.pool.get('res.partner').fields_view_get(cr, uid, view_id, view_type, context, toolbar, submenu)
+ #made a lot of views crash because methods of open chatter are not available on users
+ #if not view_id and view_type == 'form':
+ # return self.pool.get('res.partner').fields_view_get(cr, uid, view_id, view_type, context, toolbar, submenu)
return super(res_users, self).fields_view_get(cr, uid, view_id, view_type, context, toolbar, submenu)
# User can write on a few of his own fields (but not his groups for example)
diff --git a/openerp/modules/loading.py b/openerp/modules/loading.py
index 200f144d595..c3d73859730 100644
--- a/openerp/modules/loading.py
+++ b/openerp/modules/loading.py
@@ -112,6 +112,7 @@ def load_module_graph(cr, graph, status=None, perform_checks=True, skip_modules=
if kind in ('demo', 'demo_xml'):
noupdate = True
try:
+ ext = ext.lower()
if ext == '.csv':
if kind in ('init', 'init_xml'):
noupdate = True
@@ -120,8 +121,12 @@ def load_module_graph(cr, graph, status=None, perform_checks=True, skip_modules=
process_sql_file(cr, fp)
elif ext == '.yml':
tools.convert_yaml_import(cr, module_name, fp, kind, idref, mode, noupdate, report)
- else:
+ elif ext == '.xml':
tools.convert_xml_import(cr, module_name, fp, idref, mode, noupdate, report)
+ elif ext == '.js':
+ pass # .js files are valid but ignored here.
+ else:
+ _logger.warning("Can't load unknown file type %s.", filename)
finally:
fp.close()
diff --git a/openerp/modules/module.py b/openerp/modules/module.py
index 7134d96b644..e2e3dbfc8fa 100644
--- a/openerp/modules/module.py
+++ b/openerp/modules/module.py
@@ -28,15 +28,9 @@ import sys
import types
import zipimport
-import openerp
-
-import openerp.osv as osv
import openerp.tools as tools
import openerp.tools.osutil as osutil
from openerp.tools.safe_eval import safe_eval as eval
-from openerp.tools.translate import _
-
-import openerp.netsvc as netsvc
import zipfile
import openerp.release as release
@@ -48,9 +42,6 @@ from cStringIO import StringIO
import logging
-import openerp.modules.db
-import openerp.modules.graph
-
_logger = logging.getLogger(__name__)
_ad = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'addons') # default addons path (base)
@@ -335,7 +326,6 @@ def load_information_from_description_file(module):
'description': '',
'icon': get_module_icon(module),
'installable': True,
- 'auto_install': False,
'license': 'AGPL-3',
'name': False,
'post_load': None,
diff --git a/openerp/osv/fields.py b/openerp/osv/fields.py
index 2d51ad7d6db..0d12a9a8bc3 100644
--- a/openerp/osv/fields.py
+++ b/openerp/osv/fields.py
@@ -44,8 +44,8 @@ import openerp
import openerp.tools as tools
from openerp.tools.translate import _
from openerp.tools import float_round, float_repr
+from openerp.tools import html_sanitize
import simplejson
-from openerp.tools.html_sanitize import html_sanitize
from openerp import SUPERUSER_ID
_logger = logging.getLogger(__name__)
diff --git a/openerp/service/web_services.py b/openerp/service/web_services.py
index 5e01ad56105..57eb434100d 100644
--- a/openerp/service/web_services.py
+++ b/openerp/service/web_services.py
@@ -112,7 +112,7 @@ class db(netsvc.ExportService):
if method in [ 'create', 'get_progress', 'drop', 'dump',
'restore', 'rename',
'change_admin_password', 'migrate_databases',
- 'create_database' ]:
+ 'create_database', 'duplicate_database' ]:
passwd = params[0]
params = params[1:]
security.check_super(passwd)
@@ -161,11 +161,22 @@ class db(netsvc.ExportService):
self.actions[id] = {'clean': False}
- _logger.info('CREATE DATABASE %s', db_name.lower())
+ _logger.info('Create database `%s`.', db_name)
self._create_empty_database(db_name)
_initialize_db(self, id, db_name, demo, lang, user_password)
return True
+ def exp_duplicate_database(self, db_original_name, db_name):
+ _logger.info('Duplicate database `%s` to `%s`.', db_original_name, db_name)
+ db = sql_db.db_connect('postgres')
+ cr = db.cursor()
+ try:
+ cr.autocommit(True) # avoid transaction block
+ cr.execute("""CREATE DATABASE "%s" ENCODING 'unicode' TEMPLATE "%s" """ % (db_name, db_original_name))
+ finally:
+ cr.close()
+ return True
+
def exp_get_progress(self, id):
if self.actions[id]['thread'].isAlive():
# return openerp.modules.init_progress[db_name]
diff --git a/openerp/tests/__init__.py b/openerp/tests/__init__.py
index 65277218da3..d036ccfc751 100644
--- a/openerp/tests/__init__.py
+++ b/openerp/tests/__init__.py
@@ -8,7 +8,7 @@ Tests can be explicitely added to the `fast_suite` or `checks` lists or not.
See the :ref:`test-framework` section in the :ref:`features` list.
"""
-from . import test_expression, test_html_sanitize, test_ir_sequence, test_orm,\
+from . import test_expression, test_mail, test_ir_sequence, test_orm,\
test_fields, test_basecase, \
test_view_validation, test_uninstall, test_misc, test_db_cursor
from . import test_ir_filters
@@ -20,7 +20,7 @@ fast_suite = [
checks = [
test_expression,
- test_html_sanitize,
+ test_mail,
test_db_cursor,
test_orm,
test_fields,
diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py
deleted file mode 100755
index cb46b325144..00000000000
--- a/openerp/tests/test_html_sanitize.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import unittest
-from openerp.tools.html_sanitize import html_sanitize
-
-test_case = """
-test1
-
-test2
-test3
-test4
-test5
-test6
-
-
-test12
-google
-test link
-"""
-
-class TestSanitizer(unittest.TestCase):
-
- def test_simple(self):
- x = "yop"
- self.assertEqual(x, html_sanitize(x))
-
- def test_trailing_text(self):
- x = 'lalayop
xxx'
- self.assertEqual(x, html_sanitize(x))
-
- def test_no_exception(self):
- html_sanitize(test_case)
-
- def test_unicode(self):
- html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff --git a/openerp/tests/test_mail.py b/openerp/tests/test_mail.py
new file mode 100644
index 00000000000..0ad6504a7ae
--- /dev/null
+++ b/openerp/tests/test_mail.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# This test can be run stand-alone with something like:
+# > PYTHONPATH=. python2 openerp/tests/test_misc.py
+##############################################################################
+#
+# OpenERP, Open Source Business Applications
+# Copyright (c) 2012-TODAY OpenERP S.A.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+#
+##############################################################################
+
+import unittest2
+from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
+
+HTML_SOURCE = """
+test1
+
+test2
+test3
+test4
+test5
+test6
+
+
+test12
+google
+test link
+"""
+
+TEXT_MAIL1 = """I contact you about our meeting for tomorrow. Here is the schedule I propose:
+9 AM: brainstorming about our new amazing business app
+9.45 AM: summary
+10 AM: meeting with Fabien to present our app
+Is everything ok for you ?
+--
+Administrator"""
+
+HTML_MAIL1 = """
+I contact you about our meeting for tomorrow. Here is the schedule I propose:
+
+
+- 9 AM: brainstorming about our new amazing business app
+- 9.45 AM: summary
+- 10 AM: meeting with Fabien to present our app
+
+Is everything ok for you ?
"""
+
+GMAIL_REPLY1_SAN = """Hello,
Ok for me. I am replying directly in gmail, without signature.
Kind regards,
Demo.
On Thu, Nov 8, 2012 at 5:29 PM,
<dummy@example.com> wrote:
I contact you about our meeting for tomorrow. Here is the schedule I propose:
- 9 AM: brainstorming about our new amazing business app</span></li>
+- 9.45 AM: summary
- 10 AM: meeting with Fabien to present our app
Is everything ok for you ?
+
+
+
+
"""
+
+THUNDERBIRD_16_REPLY1_SAN = """
+
+ I contact you about our meeting for tomorrow. Here is the
+ schedule I propose:
+
+
- 9 AM: brainstorming about our new amazing business
+ app</span></li>
+ - 9.45 AM: summary
+ - 10 AM: meeting with Fabien to present our app
+
+ Is everything ok for you ?
+
+
+
+ Ok for me. I am replying directly below your mail, using
+ Thunderbird, with a signature.
+ Did you receive my email about my new laptop, by the way ?
+ Raoul.
--
+Raoul Grosbedonnée
+
"""
+
+TEXT_TPL = """Salut Raoul!
+Le 28 oct. 2012 à 00:02, Raoul Grosbedon a écrit :
+
+> C'est sûr que je suis intéressé (quote)!
+
+Trouloulou pouet pouet. Je ne vais quand même pas écrire de vrais mails, non mais ho.
+
+> 2012/10/27 Bert Tartopoils :
+>> Diantre, me disè-je en envoyant un message similaire à Martine, mais comment vas-tu (quote)?
+>>
+>> A la base le contenu était un vrai mail, mais je l'ai quand même réécrit pour ce test, histoire de dire que, quand même, on ne met pas n'importe quoi ici. (quote)
+>>
+>> Et sinon bon courage pour trouver tes clefs (quote).
+>>
+>> Bert TARTOPOILS
+>> bert.tartopoils@miam.miam
+>>
+>
+>
+> --
+> Raoul Grosbedon
+
+Bert TARTOPOILS
+bert.tartopoils@miam.miam
+"""
+
+
+class TestSanitizer(unittest2.TestCase):
+ """ Test the html sanitizer that filters html to remove unwanted attributes """
+
+ def test_simple(self):
+ x = "yop"
+ self.assertEqual(x, html_sanitize(x))
+
+ def test_trailing_text(self):
+ x = 'lalayop
xxx'
+ self.assertEqual(x, html_sanitize(x))
+
+ def test_html(self):
+ sanitized_html = html_sanitize(HTML_SOURCE)
+ for tag in ['', '"),
+ ("FirstIt should be escaped
\nSignature", False,
+ "First<p>It should be escaped</p>
Signature
")
+ ]
+ for content, container_tag, expected in cases:
+ html = plaintext2html(content, container_tag)
+ self.assertEqual(html, expected, 'plaintext2html is broken')
+
+ def test_append_to_html(self):
+ test_samples = [
+ ('some content', '--\nYours truly', True, True, False,
+ 'some content\n--\nYours truly
\n'),
+ ('some content', '--\nYours truly', True, False, False,
+ 'some content\n--
Yours truly
\n'),
+ ('some content', '\n\n--
\nYours truly
\n\n', False, False, False,
+ 'some content\n\n\n--
\nYours truly
\n\n\n'),
+ ]
+ for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
+ self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
+
+
+if __name__ == '__main__':
+ unittest2.main()
diff --git a/openerp/tests/test_misc.py b/openerp/tests/test_misc.py
index 54ae69899a6..9540c0222f3 100644
--- a/openerp/tests/test_misc.py
+++ b/openerp/tests/test_misc.py
@@ -4,18 +4,6 @@
import unittest2
from ..tools import misc
-class append_content_to_html(unittest2.TestCase):
- """ Test some of our generic utility functions """
-
- def test_append_to_html(self):
- test_samples = [
- ('some content', '--\nYours truly', True,
- 'some content\n--\nYours truly
\n'),
- ('some content', '\n\n--
\nYours truly
\n\n', False,
- 'some content\n\n\n--
\nYours truly
\n\n\n'),
- ]
- for html, content, flag, expected in test_samples:
- self.assertEqual(misc.append_content_to_html(html,content,flag), expected, 'append_content_to_html is broken')
class test_countingstream(unittest2.TestCase):
def test_empty_stream(self):
diff --git a/openerp/tools/__init__.py b/openerp/tools/__init__.py
index a0ca411a9df..af14189bbae 100644
--- a/openerp/tools/__init__.py
+++ b/openerp/tools/__init__.py
@@ -33,7 +33,7 @@ from pdf_utils import *
from yaml_import import *
from sql import *
from float_utils import *
-from html_sanitize import *
+from mail import *
#.apidoc title: Tools
diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py
deleted file mode 100644
index 6ea7b90e2ba..00000000000
--- a/openerp/tools/html_sanitize.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-##############################################################################
-#
-# OpenERP, Open Source Business Applications
-# Copyright (C) 2012 OpenERP S.A. ().
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-##############################################################################
-
-import lxml.html
-import operator
-import re
-
-from openerp.loglevels import ustr
-
-def html_sanitize(src):
- if not src:
- return src
- src = ustr(src, errors='replace')
- root = lxml.html.fromstring(u"%s
" % src)
- result = handle_element(root)
- res = []
- for element in children(result[0]):
- if isinstance(element, basestring):
- res.append(element)
- else:
- element.tail = ""
- res.append(lxml.html.tostring(element))
- return ''.join(res)
-
-# FIXME: shouldn't this be a whitelist rather than a blacklist?!
-to_remove = set(["script", "head", "meta", "title", "link", "img"])
-to_unwrap = set(["html", "body"])
-
-javascript_regex = re.compile(r"^\s*javascript\s*:.*$", re.IGNORECASE)
-
-def handle_a(el, new):
- href = el.get("href", "#")
- if javascript_regex.search(href):
- href = "#"
- new.set("href", href)
-
-special = {
- "a": handle_a,
-}
-
-def handle_element(element):
- if isinstance(element, basestring):
- return [element]
- if element.tag in to_remove:
- return []
- if element.tag in to_unwrap:
- return reduce(operator.add, [handle_element(x) for x in children(element)])
- result = lxml.html.fromstring("<%s />" % element.tag)
- for c in children(element):
- append_to(handle_element(c), result)
- if element.tag in special:
- special[element.tag](element, result)
- return [result]
-
-def children(node):
- res = []
- if node.text is not None:
- res.append(node.text)
- for child_node in node.getchildren():
- res.append(child_node)
- if child_node.tail is not None:
- res.append(child_node.tail)
- return res
-
-def append_to(elements, dest_node):
- for element in elements:
- if isinstance(element, basestring):
- children = dest_node.getchildren()
- if len(children) == 0:
- dest_node.text = element
- else:
- children[-1].tail = element
- else:
- dest_node.append(element)
diff --git a/openerp/tools/image.py b/openerp/tools/image.py
index e2b14f18d0d..eb3600a6652 100644
--- a/openerp/tools/image.py
+++ b/openerp/tools/image.py
@@ -20,11 +20,10 @@
##############################################################################
import io
-import sys
import StringIO
from PIL import Image
-from PIL import ImageFilter
+from PIL import ImageEnhance
from random import random
# ----------------------------------------
@@ -67,18 +66,15 @@ def image_resize_image(base64_source, size=(1024, 1024), encoding='base64', file
# check image size: do not create a thumbnail if avoiding smaller images
if avoid_if_small and image.size[0] <= size[0] and image.size[1] <= size[1]:
return base64_source
-
- if (float(image.size[0])/image.size[1]) > (float(size[0]) / size[1]):
- ibox = (size[1] * image.size[0] / image.size[1] , size[1])
- deltax = max((size[1] * image.size[0] / image.size[1] - size[0]) / 2, 0)
- deltay = 0
- else:
- ibox = (size[0],size[0] * image.size[1] / image.size[0])
- deltax = 0
- deltay = max((size[0] * image.size[1] / image.size[0] - size[1]) / 2, 0)
-
- im2 = image.resize(ibox, Image.ANTIALIAS)
- background = im2.crop((deltax, deltay, deltax+size[0], deltay+size[1]))
+ # create a thumbnail: will resize and keep ratios, then sharpen for better looking result
+ image.thumbnail(size, Image.ANTIALIAS)
+ sharpener = ImageEnhance.Sharpness(image.convert('RGB'))
+ image = sharpener.enhance(2.0)
+ # create a transparent image for background
+ background = Image.new('RGBA', size, (255, 255, 255, 0))
+ # past the resized image on the background
+ background.paste(image, ((size[0] - image.size[0]) / 2, (size[1] - image.size[1]) / 2))
+ # return an encoded image
background_stream = StringIO.StringIO()
background.save(background_stream, filetype)
return background_stream.getvalue().encode(encoding)
diff --git a/openerp/tools/mail.py b/openerp/tools/mail.py
new file mode 100644
index 00000000000..39d918da97b
--- /dev/null
+++ b/openerp/tools/mail.py
@@ -0,0 +1,387 @@
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# OpenERP, Open Source Business Applications
+# Copyright (C) 2012 OpenERP S.A. ().
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+#
+##############################################################################
+
+from lxml import etree
+import cgi
+import logging
+import lxml.html
+import openerp.pooler as pooler
+import operator
+import random
+import re
+import socket
+import threading
+import time
+
+from openerp.loglevels import ustr
+
+_logger = logging.getLogger(__name__)
+
+
+#----------------------------------------------------------
+# HTML Sanitizer
+#----------------------------------------------------------
+
+def html_sanitize(src):
+ if not src:
+ return src
+ src = ustr(src, errors='replace')
+ root = lxml.html.fromstring(u"%s
" % src)
+ result = handle_element(root)
+ res = []
+ for element in children(result[0]):
+ if isinstance(element, basestring):
+ res.append(element)
+ else:
+ element.tail = ""
+ res.append(lxml.html.tostring(element))
+ return ''.join(res)
+
+# FIXME: shouldn't this be a whitelist rather than a blacklist?!
+to_remove = set(["script", "head", "meta", "title", "link", "img"])
+to_unwrap = set(["html", "body"])
+
+javascript_regex = re.compile(r"^\s*javascript\s*:.*$", re.IGNORECASE)
+
+def handle_a(el, new):
+ href = el.get("href", "#")
+ if javascript_regex.search(href):
+ href = "#"
+ new.set("href", href)
+
+special = {
+ "a": handle_a,
+}
+
+def handle_element(element):
+ if isinstance(element, basestring):
+ return [element]
+ if element.tag in to_remove:
+ return []
+ if element.tag in to_unwrap:
+ return reduce(operator.add, [handle_element(x) for x in children(element)])
+ result = lxml.html.fromstring("<%s />" % element.tag)
+ for c in children(element):
+ append_to(handle_element(c), result)
+ if element.tag in special:
+ special[element.tag](element, result)
+ return [result]
+
+def children(node):
+ res = []
+ if node.text is not None:
+ res.append(node.text)
+ for child_node in node.getchildren():
+ res.append(child_node)
+ if child_node.tail is not None:
+ res.append(child_node.tail)
+ return res
+
+def append_to(elements, dest_node):
+ for element in elements:
+ if isinstance(element, basestring):
+ children = dest_node.getchildren()
+ if len(children) == 0:
+ dest_node.text = element
+ else:
+ children[-1].tail = element
+ else:
+ dest_node.append(element)
+
+
+#----------------------------------------------------------
+# HTML Cleaner
+#----------------------------------------------------------
+
+def html_email_clean(html):
+ """ html_email_clean: clean the html to display in the web client.
+ - strip email quotes (remove blockquote nodes)
+ - strip signatures (remove --\n{\n)Blahblah), by replacing
by
+ \n to avoid ignoring signatures converted into html
+
+ :param string html: sanitized html; tags like html or head should not
+ be present in the html string. This method therefore takes as input
+ html code coming from a sanitized source, like fields.html.
+ """
+ def _replace_matching_regex(regex, source, replace=''):
+ dest = ''
+ idx = 0
+ for item in re.finditer(regex, source):
+ dest += source[idx:item.start()] + replace
+ idx = item.end()
+ dest += source[idx:]
+ return dest
+
+ html = ustr(html)
+
+ # 1.
-> \n, because otherwise the tree is obfuscated
+ br_tags = re.compile(r'([<]\s*[bB][rR]\s*\/?[>])')
+ html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
+
+ # 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
+ root = lxml.html.fromstring(html)
+ if not len(root) and root.text is None and root.tail is None:
+ html = '%s
' % html
+ root = lxml.html.fromstring(html)
+
+ # 2.5 remove quoted text in nodes
+ quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
+ for node in root.getiterator():
+ if not node.text:
+ continue
+ node.text = _replace_matching_regex(quote_tags, node.text)
+
+ # 3. remove blockquotes
+ quotes = [el for el in root.getiterator(tag='blockquote')]
+ for node in quotes:
+ # copy the node tail into parent text
+ if node.tail:
+ parent = node.getparent()
+ parent.text = parent.text or '' + node.tail
+ # remove the node
+ node.getparent().remove(node)
+
+ # 4. strip signatures
+ signature = re.compile(r'([-]{2}[\s]?[\r\n]{1,2}[^\z]+)')
+ for elem in root.getiterator():
+ if elem.text:
+ match = re.search(signature, elem.text)
+ if match:
+ elem.text = elem.text[:match.start()] + elem.text[match.end():]
+ if elem.tail:
+ match = re.search(signature, elem.tail)
+ if match:
+ elem.tail = elem.tail[:match.start()] + elem.tail[match.end():]
+
+ # 5. \n back to
+ html = etree.tostring(root, pretty_print=True)
+ html = html.replace('__BR_TAG__', '
')
+
+ # 6. Misc cleaning :
+ # - ClEditor seems to love using -> replace with
+ br_div_tags = re.compile(r'(
\s*
\s*<\/div>)')
+ html = _replace_matching_regex(br_div_tags, html, '
')
+
+ return html
+
+
+#----------------------------------------------------------
+# HTML/Text management
+#----------------------------------------------------------
+
+def html2plaintext(html, body_id=None, encoding='utf-8'):
+ """ From an HTML text, convert the HTML to plain text.
+ If @param body_id is provided then this is the tag where the
+ body (not necessarily ) starts.
+ """
+ ## (c) Fry-IT, www.fry-it.com, 2007
+ ##
+ ## download here: http://www.peterbe.com/plog/html2plaintext
+
+ html = ustr(html)
+ tree = etree.fromstring(html, parser=etree.HTMLParser())
+
+ if body_id is not None:
+ source = tree.xpath('//*[@id=%s]' % (body_id,))
+ else:
+ source = tree.xpath('//body')
+ if len(source):
+ tree = source[0]
+
+ url_index = []
+ i = 0
+ for link in tree.findall('.//a'):
+ url = link.get('href')
+ if url:
+ i += 1
+ link.tag = 'span'
+ link.text = '%s [%s]' % (link.text, i)
+ url_index.append(url)
+
+ html = ustr(etree.tostring(tree, encoding=encoding))
+
+ html = html.replace('', '*').replace('', '*')
+ html = html.replace('', '*').replace('', '*')
+ html = html.replace('', '*').replace('
', '*')
+ html = html.replace('', '**').replace('
', '**')
+ html = html.replace('', '**').replace('
', '**')
+ html = html.replace('', '/').replace('', '/')
+ html = html.replace('', '\n')
+ html = html.replace('', '\n')
+ html = re.sub('
', '\n', html)
+ html = re.sub('<.*?>', ' ', html)
+ html = html.replace(' ' * 2, ' ')
+
+ # strip all lines
+ html = '\n'.join([x.strip() for x in html.splitlines()])
+ html = html.replace('\n' * 2, '\n')
+
+ for i, url in enumerate(url_index):
+ if i == 0:
+ html += '\n\n'
+ html += ustr('[%s] %s\n') % (i + 1, url)
+
+ return html
+
+def plaintext2html(text, container_tag=False):
+ """ Convert plaintext into html. Content of the text is escaped to manage
+ html entities, using cgi.escape().
+ - all \n,\r are replaced by
+ - enclose content into
+ - 2 or more consecutive
are considered as paragraph breaks
+
+ :param string container_tag: container of the html; by default the
+ content is embedded into a
+ """
+ text = cgi.escape(ustr(text))
+
+ # 1. replace \n and \r
+ text = text.replace('\n', '
')
+ text = text.replace('\r', '
')
+
+ # 2-3: form paragraphs
+ idx = 0
+ final = '
'
+ br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
+ for item in re.finditer(br_tags, text):
+ final += text[idx:item.start()] + '
'
+ idx = item.end()
+ final += text[idx:] + '
'
+
+ # 4. container
+ if container_tag:
+ final = '<%s>%s%s>' % (container_tag, final, container_tag)
+ return ustr(final)
+
+def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
+ """ Append extra content at the end of an HTML snippet, trying
+ to locate the end of the HTML document (,