ir.translation, import: push the algo. to SQL, improve performance
At a language import (translation files), we want to push a big batch of translation records into the database. These will need update-or-insert logic (against existing ones) or even resolution of ir.model.data . Doing this loop in Python had been slow (invoking 2x read()s, +1 for ir.model.data, 1 insert or update), triggered the cache (fill and clean at each iteration). Instead, follow the old-school db recipe for mass records insertion: - create a temporary table w/o indexes or constraints - quickly populate the temp with all records of the batch (through a dedicated "cursor" object) - process the table, doing lookups in collective SQL queries (yes, SQL is all about loops of data processing, efficiently) - insert all records from temp into ir.model.data - call (implicitly) all constraints of ir.model.data at the end of that single query. This improves performance of translation imports by ~3x at least. bzr revid: xrg@linux.gr-20110608162059-rfy1vvwp8w66ry0i
This commit is contained in:
parent
d4f82fbd8c
commit
f1558730f6
|
@ -21,6 +21,7 @@
|
|||
|
||||
from osv import fields, osv
|
||||
import tools
|
||||
import logging
|
||||
|
||||
TRANSLATION_TYPE = [
|
||||
('field', 'Field'),
|
||||
|
@ -39,6 +40,115 @@ TRANSLATION_TYPE = [
|
|||
('sql_constraint', 'SQL Constraint')
|
||||
]
|
||||
|
||||
class ir_translation_import_cursor(object):
|
||||
"""Temporary cursor for optimizing mass insert into ir.translation
|
||||
|
||||
Open it (attached to a sql cursor), feed it with translation data and
|
||||
finish() it in order to insert multiple translations in a batch.
|
||||
"""
|
||||
_table_name = 'tmp_ir_translation_import'
|
||||
|
||||
def __init__(self, cr, uid, parent, context):
|
||||
""" Initializer
|
||||
|
||||
Store some values, and also create a temporary SQL table to accept
|
||||
the data.
|
||||
@param parent an instance of ir.translation ORM model
|
||||
"""
|
||||
|
||||
self._cr = cr
|
||||
self._uid = uid
|
||||
self._context = context
|
||||
self._overwrite = context.get('overwrite', False)
|
||||
self._debug = False
|
||||
self._parent_table = parent._table
|
||||
|
||||
# Note that Postgres will NOT inherit the constraints or indexes
|
||||
# of ir_translation, so this copy will be much faster.
|
||||
|
||||
cr.execute('''CREATE TEMP TABLE %s(
|
||||
imd_model VARCHAR(64),
|
||||
imd_module VARCHAR(64),
|
||||
imd_name VARCHAR(128)
|
||||
) INHERITS (%s) ''' % (self._table_name, self._parent_table))
|
||||
|
||||
def push(self, ddict):
|
||||
"""Feed a translation, as a dictionary, into the cursor
|
||||
"""
|
||||
|
||||
self._cr.execute("INSERT INTO " + self._table_name \
|
||||
+ """(name, lang, res_id, src, type,
|
||||
imd_model, imd_module, imd_name, value)
|
||||
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
|
||||
(ddict['name'], ddict['lang'], ddict.get('res_id'), ddict['src'], ddict['type'],
|
||||
ddict.get('imd_model'), ddict.get('imd_module'), ddict.get('imd_name'),
|
||||
ddict['value']))
|
||||
|
||||
def finish(self):
|
||||
""" Transfer the data from the temp table to ir.translation
|
||||
"""
|
||||
logger = logging.getLogger('orm')
|
||||
|
||||
cr = self._cr
|
||||
if self._debug:
|
||||
cr.execute("SELECT count(*) FROM %s" % self._table_name)
|
||||
c = cr.fetchone()[0]
|
||||
logger.debug("ir.translation.cursor: We have %d entries to process", c)
|
||||
|
||||
# Step 1: resolve ir.model.data references to res_ids
|
||||
cr.execute("""UPDATE %s AS ti
|
||||
SET res_id = imd.res_id
|
||||
FROM ir_model_data AS imd
|
||||
WHERE ti.res_id IS NULL
|
||||
AND ti.imd_module IS NOT NULL AND ti.imd_name IS NOT NULL
|
||||
|
||||
AND ti.imd_module = imd.module AND ti.imd_name = imd.name
|
||||
AND ti.imd_model = imd.model; """ % self._table_name)
|
||||
|
||||
if self._debug:
|
||||
cr.execute("SELECT imd_module, imd_model, imd_name FROM %s " \
|
||||
"WHERE res_id IS NULL AND imd_module IS NOT NULL" % self._table_name)
|
||||
for row in cr.fetchall():
|
||||
logger.debug("ir.translation.cursor: missing res_id for %s. %s/%s ", *row)
|
||||
|
||||
cr.execute("DELETE FROM %s WHERE res_id IS NULL AND imd_module IS NOT NULL" % \
|
||||
self._table_name)
|
||||
|
||||
# Records w/o res_id must _not_ be inserted into our db, because they are
|
||||
# referencing non-existent data.
|
||||
|
||||
find_expr = "irt.lang = ti.lang AND irt.type = ti.type " \
|
||||
" AND irt.name = ti.name AND irt.src = ti.src " \
|
||||
" AND (ti.type != 'model' OR ti.res_id = irt.res_id) "
|
||||
|
||||
# Step 2: update existing (matching) translations
|
||||
if self._overwrite:
|
||||
cr.execute("""UPDATE ONLY %s AS irt
|
||||
SET value = ti.value
|
||||
FROM %s AS ti
|
||||
WHERE %s AND ti.value IS NOT NULL AND ti.value != ''
|
||||
""" % (self._parent_table, self._table_name, find_expr))
|
||||
|
||||
# Step 3: insert new translations
|
||||
|
||||
cr.execute("""INSERT INTO %s(name, lang, res_id, src, type, value)
|
||||
SELECT name, lang, res_id, src, type, value
|
||||
FROM %s AS ti
|
||||
WHERE NOT EXISTS(SELECT 1 FROM ONLY %s AS irt WHERE %s);
|
||||
""" % (self._parent_table, self._table_name, self._parent_table, find_expr))
|
||||
|
||||
if self._debug:
|
||||
cr.execute('SELECT COUNT(*) FROM ONLY %s' % (self._parent_table))
|
||||
c1 = cr.fetchone()[0]
|
||||
cr.execute('SELECT COUNT(*) FROM ONLY %s AS irt, %s AS ti WHERE %s' % \
|
||||
(self._parent_table, self._table_name, find_expr))
|
||||
c = cr.fetchone()[0]
|
||||
logger.debug("ir.translation.cursor: %d entries now in ir.translation, %d common entries with tmp", c1, c)
|
||||
|
||||
# Step 4: cleanup
|
||||
cr.execute("DROP TABLE %s" % self._table_name)
|
||||
return True
|
||||
|
||||
class ir_translation(osv.osv):
|
||||
_name = "ir.translation"
|
||||
_log_access = False
|
||||
|
@ -214,6 +324,13 @@ class ir_translation(osv.osv):
|
|||
result = super(ir_translation, self).unlink(cursor, user, ids, context=context)
|
||||
return result
|
||||
|
||||
def _get_import_cursor(self, cr, uid, context=None):
|
||||
""" Return a cursor-like object for fast inserting translations
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
return ir_translation_import_cursor(cr, uid, self, context=context)
|
||||
|
||||
ir_translation()
|
||||
|
||||
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4:
|
||||
|
|
|
@ -864,7 +864,6 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
|
|||
pool = pooler.get_pool(db_name)
|
||||
lang_obj = pool.get('res.lang')
|
||||
trans_obj = pool.get('ir.translation')
|
||||
model_data_obj = pool.get('ir.model.data')
|
||||
iso_lang = misc.get_iso_codes(lang)
|
||||
try:
|
||||
uid = 1
|
||||
|
@ -892,6 +891,8 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
|
|||
|
||||
# read the rest of the file
|
||||
line = 1
|
||||
irt_cursor = trans_obj._get_import_cursor(cr, uid, context=context)
|
||||
|
||||
for row in reader:
|
||||
line += 1
|
||||
# skip empty rows and rows where the translation field (=last fiefd) is empty
|
||||
|
@ -902,39 +903,41 @@ def trans_load_data(cr, fileobj, fileformat, lang, lang_name=None, verbose=True,
|
|||
# {'lang': ..., 'type': ..., 'name': ..., 'res_id': ...,
|
||||
# 'src': ..., 'value': ...}
|
||||
dic = {'lang': lang}
|
||||
dic_module = False
|
||||
for i in range(len(f)):
|
||||
if f[i] in ('module',):
|
||||
continue
|
||||
dic[f[i]] = row[i]
|
||||
|
||||
try:
|
||||
dic['res_id'] = dic['res_id'] and int(dic['res_id']) or 0
|
||||
except:
|
||||
model_data_ids = model_data_obj.search(cr, uid, [
|
||||
('model', '=', dic['name'].split(',')[0]),
|
||||
('module', '=', dic['res_id'].split('.', 1)[0]),
|
||||
('name', '=', dic['res_id'].split('.', 1)[1]),
|
||||
])
|
||||
if model_data_ids:
|
||||
dic['res_id'] = model_data_obj.browse(cr, uid,
|
||||
model_data_ids[0]).res_id
|
||||
else:
|
||||
dic['res_id'] = False
|
||||
# This would skip terms that fail to specify a res_id
|
||||
if not dic.get('res_id', False):
|
||||
continue
|
||||
|
||||
args = [
|
||||
('lang', '=', lang),
|
||||
('type', '=', dic['type']),
|
||||
('name', '=', dic['name']),
|
||||
('src', '=', dic['src']),
|
||||
]
|
||||
if dic['type'] == 'model':
|
||||
args.append(('res_id', '=', dic['res_id']))
|
||||
ids = trans_obj.search(cr, uid, args)
|
||||
if ids:
|
||||
if context.get('overwrite') and dic['value']:
|
||||
trans_obj.write(cr, uid, ids, {'value': dic['value']})
|
||||
res_id = dic.pop('res_id')
|
||||
if res_id and isinstance(res_id, (int, long)) \
|
||||
or (isinstance(res_id, basestring) and res_id.isdigit()):
|
||||
dic['res_id'] = int(res_id)
|
||||
else:
|
||||
trans_obj.create(cr, uid, dic)
|
||||
try:
|
||||
tmodel = dic['name'].split(',')[0]
|
||||
if '.' in res_id:
|
||||
tmodule, tname = res_id.split('.', 1)
|
||||
else:
|
||||
tmodule = dic_module
|
||||
tname = res_id
|
||||
dic['imd_model'] = tmodel
|
||||
dic['imd_module'] = tmodule
|
||||
dic['imd_name'] = tname
|
||||
|
||||
dic['res_id'] = None
|
||||
except Exception:
|
||||
logger.warning("Could not decode resource for %s, please fix the po file.",
|
||||
dic['res_id'], exc_info=True)
|
||||
dic['res_id'] = None
|
||||
|
||||
irt_cursor.push(dic)
|
||||
|
||||
irt_cursor.finish()
|
||||
if verbose:
|
||||
logger.info("translation file loaded succesfully")
|
||||
except IOError:
|
||||
|
|
Loading…
Reference in New Issue