#!/usr/bin/env python # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel' __requires__ = 'Babel==0.9.6' import sys from pkg_resources import load_entry_point import re import json from lxml import etree as elt from babel.messages import extract if __name__ == '__main__': sys.exit( load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')() ) XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""") TRANSLATION_FLAG_COMMENT = "openerp-web" # List of etree._Element subclasses that we choose to ignore when parsing XML. # We include the *Base ones just in case, currently they seem to be subclasses of the _* ones. SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase) def extract_xmljs(fileobj, keywords, comment_tags, options): """Extract messages from Javascript code embedded into XML documents. This complements the ``extract_javascript`` extractor which works only on pure .js files, and the``extract_qweb`` extractor, which only extracts XML text. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ assert False, """ the XMLJS extractor does not work and was removed: * Babel apparently does not accept two extractors for the same set of files so it would not run the xmljs extractor at all, extraction of JS stuff needs to be done from the XML extractor * The regex above fails up if there are back-slashed quotes within the translatable string (the string marked with _t), it just won't match the string * While extraction succeeds on XML entities (e.g. "), translation matching will fail if those entities are kept in the PO msgid as the XML parser will get an un-escaped string, without those entities (so a text extractor will extract ``Found match "%s"``, but the msgid of the PO file must be ``Found match "%s"`` or the translation will fail * single-quoted strings are not valid JSON string, so single-quoted strings matched by the regex (likely since XML attributes are double-quoted, single quotes within them don't have to be escaped) will blow up when json-parsed for their content I think that's about it. If this extractor is reimplemented, it should be integrated into extract_qweb, either in the current pass (probably not a good idea) or as a separate pass using iterparse, matching either elements with t-js or some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?), shove the attribute content into a StringIO and pass *that* to Babel's own extract_javascript; then add a line offset in order to yield the correct line number. """ content = fileobj.read() found = XMLJS_EXPR.finditer(content) index = 0 line_nbr = 0 for f in found: msg = f.group(1) msg = json.loads(msg) while index < f.start(): if content[index] == "\n": line_nbr += 1 index += 1 yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT]) def extract_qweb(fileobj, keywords, comment_tags, options): """Extract messages from qweb template files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ result = [] def handle_text(text, lineno): text = (text or "").strip() if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc. result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT])) # not using elementTree.iterparse because we need to skip sub-trees in case # the ancestor element had a reason to be skipped def iter_elements(current_element): for el in current_element: if isinstance(el, SKIPPED_ELEMENT_TYPES): continue if "t-js" not in el.attrib and \ not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \ not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"): handle_text(el.text, el.sourceline) for att in ('title', 'alt', 'label', 'placeholder'): if att in el.attrib: handle_text(el.attrib[att], el.sourceline) iter_elements(el) handle_text(el.tail, el.sourceline) tree = elt.parse(fileobj) iter_elements(tree.getroot()) return result def extract_javascript(fileobj, keywords, comment_tags, options): """Extract messages from Javascript source files. This extractor delegates to babel's buit-in javascript extractor, but adds a special comment used as a flag to identify web translations. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ for (message_lineno, funcname, messages, comments) in \ extract.extract_javascript(fileobj, keywords, comment_tags, options): comments.append(TRANSLATION_FLAG_COMMENT) yield (message_lineno, funcname, messages, comments)