ir.translation, import: push the algo. to SQL, improve performance

At a language import (translation files), we want to push a big batch of
translation records into the database. These will need update-or-insert
logic (against existing ones) or even resolution of ir.model.data .
Doing this loop in Python had been slow (invoking 2x read()s, +1 for
ir.model.data, 1 insert or update), triggered the cache (fill and clean
at each iteration).

Instead, follow the old-school db recipe for mass records insertion:
 - create a temporary table w/o indexes or constraints
 - quickly populate the temp with all records of the batch
   (through a dedicated "cursor" object)
 - process the table, doing lookups in collective SQL queries (yes, SQL
   is all about loops of data processing, efficiently)
 - insert all records from temp into ir.model.data
 - call (implicitly) all constraints of ir.model.data at the end of that
   single query.

This improves performance of translation imports by ~3x at least.

bzr revid: xrg@linux.gr-20110608162059-rfy1vvwp8w66ry0i
This commit is contained in:
P. Christeas 2011-06-08 19:20:59 +03:00 committed by P. Christeas
parent d4f82fbd8c
commit f1558730f6
2 changed files with 147 additions and 27 deletions

View File

@ -21,6 +21,7 @@
from osv import fields, osv
import tools
import logging
TRANSLATION_TYPE = [
('field', 'Field'),
@ -39,6 +40,115 @@ TRANSLATION_TYPE = [
('sql_constraint', 'SQL Constraint')
]
class ir_translation_import_cursor(object):
"""Temporary cursor for optimizing mass insert into ir.translation
Open it (attached to a sql cursor), feed it with translation data and
finish() it in order to insert multiple translations in a batch.
"""
_table_name = 'tmp_ir_translation_import'
def __init__(self, cr, uid, parent, context):
""" Initializer
Store some values, and also create a temporary SQL table to accept
the data.
@param parent an instance of ir.translation ORM model
"""
self._cr = cr
self._uid = uid
self._context = context
self._overwrite = context.get('overwrite', False)
self._debug = False
self._parent_table = parent._table
# Note that Postgres will NOT inherit the constraints or indexes
# of ir_translation, so this copy will be much faster.
cr.execute('''CREATE TEMP TABLE %s(
imd_model VARCHAR(64),
imd_module VARCHAR(64),
imd_name VARCHAR(128)
) INHERITS (%s) ''' % (self._table_name, self._parent_table))
def push(self, ddict):
"""Feed a translation, as a dictionary, into the cursor
"""
self._cr.execute("INSERT INTO " + self._table_name \
+ """(name, lang, res_id, src, type,
imd_model, imd_module, imd_name, value)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
(ddict['name'], ddict['lang'], ddict.get('res_id'), ddict['src'], ddict['type'],
ddict.get('imd_model'), ddict.get('imd_module'), ddict.get('imd_name'),
ddict['value']))
def finish(self):
""" Transfer the data from the temp table to ir.translation
"""
logger = logging.getLogger('orm')
cr = self._cr
if self._debug:
cr.execute("SELECT count(*) FROM %s" % self._table_name)
c = cr.fetchone()[0]
logger.debug("ir.translation.cursor: We have %d entries to process", c)
# Step 1: resolve ir.model.data references to res_ids
cr.execute("""UPDATE %s AS ti
SET res_id = imd.res_id
FROM ir_model_data AS imd
WHERE ti.res_id IS NULL
AND ti.imd_module IS NOT NULL AND ti.imd_name IS NOT NULL
AND ti.imd_module = imd.module AND ti.imd_name = imd.name
AND ti.imd_model = imd.model; """ % self._table_name)
if self._debug:
cr.execute("SELECT imd_module, imd_model, imd_name FROM %s " \
"WHERE res_id IS NULL AND imd_module IS NOT NULL" % self._table_name)
for row in cr.fetchall():
logger.debug("ir.translation.cursor: missing res_id for %s. %s/%s ", *row)
cr.execute("DELETE FROM %s WHERE res_id IS NULL AND imd_module IS NOT NULL" % \
self._table_name)
# Records w/o res_id must _not_ be inserted into our db, because they are
# referencing non-existent data.
find_expr = "irt.lang = ti.lang AND irt.type = ti.type " \
" AND irt.name = ti.name AND irt.src = ti.src " \
" AND (ti.type != 'model' OR ti.res_id = irt.res_id) "
# Step 2: update existing (matching) translations
if self._overwrite:
cr.execute("""UPDATE ONLY %s AS irt
SET value = ti.value
FROM %s AS ti
WHERE %s AND ti.value IS NOT NULL AND ti.value != ''
""" % (self._parent_table, self._table_name, find_expr))
# Step 3: insert new translations
cr.execute("""INSERT INTO %s(name, lang, res_id, src, type, value)
SELECT name, lang, res_id, src, type, value
FROM %s AS ti
WHERE NOT EXISTS(SELECT 1 FROM ONLY %s AS irt WHERE %s);
""" % (self._parent_table, self._table_name, self._parent_table, find_expr))
if self._debug:
cr.execute('SELECT COUNT(*) FROM ONLY %s' % (self._parent_table))
c1 = cr.fetchone()[0]
cr.execute('SELECT COUNT(*) FROM ONLY %s AS irt, %s AS ti WHERE %s' % \
(self._parent_table, self._table_name, find_expr))
c = cr.fetchone()[0]
logger.debug("ir.translation.cursor: %d entries now in ir.translation, %d common entries with tmp", c1, c)
# Step 4: cleanup
cr.execute("DROP TABLE %s" % self._table_name)
return True
class ir_translation(osv.osv):
_name = "ir.translation"
_log_access = False
@ -214,6 +324,13 @@ class ir_translation(osv.osv):
result = super(ir_translation, self).unlink(cursor, user, ids, context=context)
return result
def _get_import_cursor(self, cr, uid, context=None):
""" Return a cursor-like object for fast inserting translations
"""
if context is None:
context = {}
return ir_translation_import_cursor(cr, uid, self, context=context)
ir_translation()
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4:

View File

@ -864,7 +864,6 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
pool = pooler.get_pool(db_name)
lang_obj = pool.get('res.lang')
trans_obj = pool.get('ir.translation')
model_data_obj = pool.get('ir.model.data')
iso_lang = misc.get_iso_codes(lang)
try:
uid = 1
@ -892,6 +891,8 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
# read the rest of the file
line = 1
irt_cursor = trans_obj._get_import_cursor(cr, uid, context=context)
for row in reader:
line += 1
# skip empty rows and rows where the translation field (=last fiefd) is empty
@ -902,39 +903,41 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
# {'lang': ..., 'type': ..., 'name': ..., 'res_id': ...,
# 'src': ..., 'value': ...}
dic = {'lang': lang}
dic_module = False
for i in range(len(f)):
if f[i] in ('module',):
continue
dic[f[i]] = row[i]
try:
dic['res_id'] = dic['res_id'] and int(dic['res_id']) or 0
except:
model_data_ids = model_data_obj.search(cr, uid, [
('model', '=', dic['name'].split(',')[0]),
('module', '=', dic['res_id'].split('.', 1)[0]),
('name', '=', dic['res_id'].split('.', 1)[1]),
])
if model_data_ids:
dic['res_id'] = model_data_obj.browse(cr, uid,
model_data_ids[0]).res_id
else:
dic['res_id'] = False
# This would skip terms that fail to specify a res_id
if not dic.get('res_id', False):
continue
args = [
('lang', '=', lang),
('type', '=', dic['type']),
('name', '=', dic['name']),
('src', '=', dic['src']),
]
if dic['type'] == 'model':
args.append(('res_id', '=', dic['res_id']))
ids = trans_obj.search(cr, uid, args)
if ids:
if context.get('overwrite') and dic['value']:
trans_obj.write(cr, uid, ids, {'value': dic['value']})
res_id = dic.pop('res_id')
if res_id and isinstance(res_id, (int, long)) \
or (isinstance(res_id, basestring) and res_id.isdigit()):
dic['res_id'] = int(res_id)
else:
trans_obj.create(cr, uid, dic)
try:
tmodel = dic['name'].split(',')[0]
if '.' in res_id:
tmodule, tname = res_id.split('.', 1)
else:
tmodule = dic_module
tname = res_id
dic['imd_model'] = tmodel
dic['imd_module'] = tmodule
dic['imd_name'] = tname
dic['res_id'] = None
except Exception:
logger.warning("Could not decode resource for %s, please fix the po file.",
dic['res_id'], exc_info=True)
dic['res_id'] = None
irt_cursor.push(dic)
irt_cursor.finish()
if verbose:
logger.info("translation file loaded succesfully")
except IOError: