odoo/addons/anonymization/anonymization.py

639 lines
28 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding: utf-8 -*-
##############################################################################
#
# OpenERP, Open Source Management Solution
# Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>). All Rights Reserved
# $Id$
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
from lxml import etree
import os
import base64
try:
import cPickle as pickle
except ImportError:
import pickle
import random
import datetime
from openerp.osv import fields, osv
from openerp.tools.translate import _
from openerp.tools.safe_eval import safe_eval as eval
from itertools import groupby
from operator import itemgetter
FIELD_STATES = [('clear', 'Clear'), ('anonymized', 'Anonymized'), ('not_existing', 'Not Existing'), ('new', 'New')]
ANONYMIZATION_STATES = FIELD_STATES + [('unstable', 'Unstable')]
WIZARD_ANONYMIZATION_STATES = [('clear', 'Clear'), ('anonymized', 'Anonymized'), ('unstable', 'Unstable')]
ANONYMIZATION_HISTORY_STATE = [('started', 'Started'), ('done', 'Done'), ('in_exception', 'Exception occured')]
ANONYMIZATION_DIRECTION = [('clear -> anonymized', 'clear -> anonymized'), ('anonymized -> clear', 'anonymized -> clear')]
def group(lst, cols):
if isinstance(cols, basestring):
cols = [cols]
return dict((k, [v for v in itr]) for k, itr in groupby(sorted(lst, key=itemgetter(*cols)), itemgetter(*cols)))
class ir_model_fields_anonymization(osv.osv):
_name = 'ir.model.fields.anonymization'
_rec_name = 'field_id'
_columns = {
'model_name': fields.char('Object Name', required=True),
'model_id': fields.many2one('ir.model', 'Object', ondelete='set null'),
'field_name': fields.char('Field Name', required=True),
'field_id': fields.many2one('ir.model.fields', 'Field', ondelete='set null'),
'state': fields.selection(selection=FIELD_STATES, String='Status', required=True, readonly=True),
}
_sql_constraints = [
('model_id_field_id_uniq', 'unique (model_name, field_name)', _("You cannot have two fields with the same name on the same object!")),
]
def _get_global_state(self, cr, uid, context=None):
ids = self.search(cr, uid, [('state', '<>', 'not_existing')], context=context)
fields = self.browse(cr, uid, ids, context=context)
if not len(fields) or len(fields) == len([f for f in fields if f.state == 'clear']):
state = 'clear' # all fields are clear
elif len(fields) == len([f for f in fields if f.state == 'anonymized']):
state = 'anonymized' # all fields are anonymized
else:
state = 'unstable' # fields are mixed: this should be fixed
return state
def _check_write(self, cr, uid, context=None):
"""check that the field is created from the menu and not from an database update
otherwise the database update can crash:"""
if context is None:
context = {}
if context.get('manual'):
global_state = self._get_global_state(cr, uid, context=context)
if global_state == 'anonymized':
raise osv.except_osv('Error!', "The database is currently anonymized, you cannot create, modify or delete fields.")
elif global_state == 'unstable':
msg = _("The database anonymization is currently in an unstable state. Some fields are anonymized," + \
" while some fields are not anonymized. You should try to solve this problem before trying to create, write or delete fields.")
raise osv.except_osv('Error!', msg)
return True
def _get_model_and_field_ids(self, cr, uid, vals, context=None):
model_and_field_ids = (False, False)
if 'field_name' in vals and vals['field_name'] and 'model_name' in vals and vals['model_name']:
ir_model_fields_obj = self.pool.get('ir.model.fields')
ir_model_obj = self.pool.get('ir.model')
model_ids = ir_model_obj.search(cr, uid, [('model', '=', vals['model_name'])], context=context)
if model_ids:
field_ids = ir_model_fields_obj.search(cr, uid, [('name', '=', vals['field_name']), ('model_id', '=', model_ids[0])], context=context)
if field_ids:
field_id = field_ids[0]
model_and_field_ids = (model_ids[0], field_id)
return model_and_field_ids
def create(self, cr, uid, vals, context=None):
# check field state: all should be clear before we can add a new field to anonymize:
self._check_write(cr, uid, context=context)
global_state = self._get_global_state(cr, uid, context=context)
if 'field_name' in vals and vals['field_name'] and 'model_name' in vals and vals['model_name']:
vals['model_id'], vals['field_id'] = self._get_model_and_field_ids(cr, uid, vals, context=context)
# check not existing fields:
if not vals.get('field_id'):
vals['state'] = 'not_existing'
else:
vals['state'] = global_state
res = super(ir_model_fields_anonymization, self).create(cr, uid, vals, context=context)
return res
def write(self, cr, uid, ids, vals, context=None):
# check field state: all should be clear before we can modify a field:
if not (len(vals.keys()) == 1 and vals.get('state') == 'clear'):
self._check_write(cr, uid, context=context)
if 'field_name' in vals and vals['field_name'] and 'model_name' in vals and vals['model_name']:
vals['model_id'], vals['field_id'] = self._get_model_and_field_ids(cr, uid, vals, context=context)
# check not existing fields:
if 'field_id' in vals:
if not vals.get('field_id'):
vals['state'] = 'not_existing'
else:
global_state = self._get_global_state(cr, uid, context)
if global_state != 'unstable':
vals['state'] = global_state
res = super(ir_model_fields_anonymization, self).write(cr, uid, ids, vals, context=context)
return res
def unlink(self, cr, uid, ids, context=None):
# check field state: all should be clear before we can unlink a field:
self._check_write(cr, uid, context=context)
res = super(ir_model_fields_anonymization, self).unlink(cr, uid, ids, context=context)
return res
def onchange_model_id(self, cr, uid, ids, model_id, context=None):
res = {'value': {
'field_name': False,
'field_id': False,
'model_name': False,
}}
if model_id:
ir_model_obj = self.pool.get('ir.model')
model_ids = ir_model_obj.search(cr, uid, [('id', '=', model_id)])
model_id = model_ids and model_ids[0] or None
model_name = model_id and ir_model_obj.browse(cr, uid, model_id).model or False
res['value']['model_name'] = model_name
return res
def onchange_model_name(self, cr, uid, ids, model_name, context=None):
res = {'value': {
'field_name': False,
'field_id': False,
'model_id': False,
}}
if model_name:
ir_model_obj = self.pool.get('ir.model')
model_ids = ir_model_obj.search(cr, uid, [('model', '=', model_name)])
model_id = model_ids and model_ids[0] or False
res['value']['model_id'] = model_id
return res
def onchange_field_name(self, cr, uid, ids, field_name, model_name):
res = {'value': {
'field_id': False,
}}
if field_name and model_name:
ir_model_fields_obj = self.pool.get('ir.model.fields')
field_ids = ir_model_fields_obj.search(cr, uid, [('name', '=', field_name), ('model', '=', model_name)])
field_id = field_ids and field_ids[0] or False
res['value']['field_id'] = field_id
return res
def onchange_field_id(self, cr, uid, ids, field_id, model_name):
res = {'value': {
'field_name': False,
}}
if field_id:
ir_model_fields_obj = self.pool.get('ir.model.fields')
field = ir_model_fields_obj.browse(cr, uid, field_id)
res['value']['field_name'] = field.name
return res
_defaults = {
'state': lambda *a: 'clear',
}
class ir_model_fields_anonymization_history(osv.osv):
_name = 'ir.model.fields.anonymization.history'
_order = "date desc"
_columns = {
'date': fields.datetime('Date', required=True, readonly=True),
'field_ids': fields.many2many('ir.model.fields.anonymization', 'anonymized_field_to_history_rel', 'field_id', 'history_id', 'Fields', readonly=True),
'state': fields.selection(selection=ANONYMIZATION_HISTORY_STATE, string='Status', required=True, readonly=True),
'direction': fields.selection(selection=ANONYMIZATION_DIRECTION, string='Direction', size=20, required=True, readonly=True),
'msg': fields.text('Message', readonly=True),
'filepath': fields.char(string='File path', readonly=True),
}
class ir_model_fields_anonymize_wizard(osv.osv_memory):
_name = 'ir.model.fields.anonymize.wizard'
def _get_state(self, cr, uid, ids, name, arg, context=None):
res = {}
state = self._get_state_value(cr, uid, context=None)
for id in ids:
res[id] = state
return res
def _get_summary(self, cr, uid, ids, name, arg, context=None):
res = {}
summary = self._get_summary_value(cr, uid, context)
for id in ids:
res[id] = summary
return res
_columns = {
'name': fields.char(string='File Name'),
'summary': fields.function(_get_summary, type='text', string='Summary'),
'file_export': fields.binary(string='Export'),
'file_import': fields.binary(string='Import', help="This is the file created by the anonymization process. It should have the '.pickle' extention."),
'state': fields.function(_get_state, string='Status', type='selection', selection=WIZARD_ANONYMIZATION_STATES, readonly=False),
'msg': fields.text(string='Message'),
}
def _get_state_value(self, cr, uid, context=None):
state = self.pool.get('ir.model.fields.anonymization')._get_global_state(cr, uid, context=context)
return state
def _get_summary_value(self, cr, uid, context=None):
summary = u''
anon_field_obj = self.pool.get('ir.model.fields.anonymization')
ir_model_fields_obj = self.pool.get('ir.model.fields')
anon_field_ids = anon_field_obj.search(cr, uid, [('state', '<>', 'not_existing')], context=context)
anon_fields = anon_field_obj.browse(cr, uid, anon_field_ids, context=context)
field_ids = [anon_field.field_id.id for anon_field in anon_fields if anon_field.field_id]
fields = ir_model_fields_obj.browse(cr, uid, field_ids, context=context)
fields_by_id = dict([(f.id, f) for f in fields])
for anon_field in anon_fields:
field = fields_by_id.get(anon_field.field_id.id)
values = {
'model_name': field.model_id.name,
'model_code': field.model_id.model,
'field_code': field.name,
'field_name': field.field_description,
'state': anon_field.state,
}
summary += u" * %(model_name)s (%(model_code)s) -> %(field_name)s (%(field_code)s): state: (%(state)s)\n" % values
return summary
def default_get(self, cr, uid, fields_list, context=None):
res = {}
res['name'] = '.pickle'
res['summary'] = self._get_summary_value(cr, uid, context)
res['state'] = self._get_state_value(cr, uid, context)
res['msg'] = _("""Before executing the anonymization process, you should make a backup of your database.""")
return res
def fields_view_get(self, cr, uid, view_id=None, view_type='form', context=None, *args, **kwargs):
state = self.pool.get('ir.model.fields.anonymization')._get_global_state(cr, uid, context=context)
if context is None:
context = {}
step = context.get('step', 'new_window')
res = super(ir_model_fields_anonymize_wizard, self).fields_view_get(cr, uid, view_id, view_type, context, *args, **kwargs)
eview = etree.fromstring(res['arch'])
placeholder = eview.xpath("group[@name='placeholder1']")
if len(placeholder):
placeholder = placeholder[0]
if step == 'new_window' and state == 'clear':
# clicked in the menu and the fields are not anonymized: warn the admin that backuping the db is very important
placeholder.addnext(etree.Element('field', {'name': 'msg', 'colspan': '4', 'nolabel': '1'}))
placeholder.addnext(etree.Element('newline'))
placeholder.addnext(etree.Element('label', {'string': 'Warning'}))
eview.remove(placeholder)
elif step == 'new_window' and state == 'anonymized':
# clicked in the menu and the fields are already anonymized
placeholder.addnext(etree.Element('newline'))
placeholder.addnext(etree.Element('field', {'name': 'file_import', 'required': "1"}))
placeholder.addnext(etree.Element('label', {'string': 'Anonymization file'}))
eview.remove(placeholder)
elif step == 'just_anonymized':
# we just ran the anonymization process, we need the file export field
placeholder.addnext(etree.Element('newline'))
placeholder.addnext(etree.Element('field', {'name': 'file_export'}))
# we need to remove the button:
buttons = eview.xpath("button")
for button in buttons:
eview.remove(button)
# and add a message:
placeholder.addnext(etree.Element('field', {'name': 'msg', 'colspan': '4', 'nolabel': '1'}))
placeholder.addnext(etree.Element('newline'))
placeholder.addnext(etree.Element('label', {'string': 'Result'}))
# remove the placeholer:
eview.remove(placeholder)
elif step == 'just_desanonymized':
# we just reversed the anonymization process, we don't need any field
# we need to remove the button
buttons = eview.xpath("button")
for button in buttons:
eview.remove(button)
# and add a message
# and add a message:
placeholder.addnext(etree.Element('field', {'name': 'msg', 'colspan': '4', 'nolabel': '1'}))
placeholder.addnext(etree.Element('newline'))
placeholder.addnext(etree.Element('label', {'string': 'Result'}))
# remove the placeholer:
eview.remove(placeholder)
else:
msg = _("The database anonymization is currently in an unstable state. Some fields are anonymized," + \
" while some fields are not anonymized. You should try to solve this problem before trying to do anything else.")
raise osv.except_osv('Error!', msg)
res['arch'] = etree.tostring(eview)
return res
def _raise_after_history_update(self, cr, uid, history_id, error_type, error_msg):
self.pool.get('ir.model.fields.anonymization.history').write(cr, uid, history_id, {
'state': 'in_exception',
'msg': error_msg,
})
raise osv.except_osv(error_type, error_msg)
def anonymize_database(self, cr, uid, ids, context=None):
"""Sets the 'anonymized' state to defined fields"""
# create a new history record:
anonymization_history_model = self.pool.get('ir.model.fields.anonymization.history')
vals = {
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'state': 'started',
'direction': 'clear -> anonymized',
}
history_id = anonymization_history_model.create(cr, uid, vals)
# check that all the defined fields are in the 'clear' state
state = self.pool.get('ir.model.fields.anonymization')._get_global_state(cr, uid, context=context)
if state == 'anonymized':
self._raise_after_history_update(cr, uid, history_id, _('Error !'), _("The database is currently anonymized, you cannot anonymize it again."))
elif state == 'unstable':
msg = _("The database anonymization is currently in an unstable state. Some fields are anonymized," + \
" while some fields are not anonymized. You should try to solve this problem before trying to do anything.")
self._raise_after_history_update(cr, uid, history_id, 'Error !', msg)
# do the anonymization:
dirpath = os.environ.get('HOME') or os.getcwd()
rel_filepath = 'field_anonymization_%s_%s.pickle' % (cr.dbname, history_id)
abs_filepath = os.path.abspath(os.path.join(dirpath, rel_filepath))
ir_model_fields_anonymization_model = self.pool.get('ir.model.fields.anonymization')
field_ids = ir_model_fields_anonymization_model.search(cr, uid, [('state', '<>', 'not_existing')], context=context)
fields = ir_model_fields_anonymization_model.browse(cr, uid, field_ids, context=context)
if not fields:
msg = "No fields are going to be anonymized."
self._raise_after_history_update(cr, uid, history_id, 'Error !', msg)
data = []
for field in fields:
model_name = field.model_id.model
field_name = field.field_id.name
field_type = field.field_id.ttype
table_name = self.pool[model_name]._table
# get the current value
sql = "select id, %s from %s" % (field_name, table_name)
cr.execute(sql)
records = cr.dictfetchall()
for record in records:
data.append({"model_id": model_name, "field_id": field_name, "id": record['id'], "value": record[field_name]})
# anonymize the value:
anonymized_value = None
sid = str(record['id'])
if field_type == 'char':
anonymized_value = 'xxx'+sid
elif field_type == 'selection':
anonymized_value = 'xxx'+sid
elif field_type == 'text':
anonymized_value = 'xxx'+sid
elif field_type == 'boolean':
anonymized_value = random.choice([True, False])
elif field_type == 'date':
anonymized_value = '2011-11-11'
elif field_type == 'datetime':
anonymized_value = '2011-11-11 11:11:11'
elif field_type == 'float':
anonymized_value = 0.0
elif field_type == 'integer':
anonymized_value = 0
elif field_type in ['binary', 'many2many', 'many2one', 'one2many', 'reference']: # cannot anonymize these kind of fields
msg = _("Cannot anonymize fields of these types: binary, many2many, many2one, one2many, reference.")
self._raise_after_history_update(cr, uid, history_id, 'Error !', msg)
if anonymized_value is None:
self._raise_after_history_update(cr, uid, history_id, _('Error !'), _("Anonymized value is None. This cannot happens."))
sql = "update %(table)s set %(field)s = %%(anonymized_value)s where id = %%(id)s" % {
'table': table_name,
'field': field_name,
}
cr.execute(sql, {
'anonymized_value': anonymized_value,
'id': record['id']
})
# save pickle:
fn = open(abs_filepath, 'w')
pickle.dump(data, fn, pickle.HIGHEST_PROTOCOL)
# update the anonymization fields:
values = {
'state': 'anonymized',
}
ir_model_fields_anonymization_model.write(cr, uid, field_ids, values, context=context)
# add a result message in the wizard:
msgs = ["Anonymization successful.",
"",
"Donot forget to save the resulting file to a safe place because you will not be able to revert the anonymization without this file.",
"",
"This file is also stored in the %s directory. The absolute file path is: %s.",
]
msg = '\n'.join(msgs) % (dirpath, abs_filepath)
fn = open(abs_filepath, 'r')
self.write(cr, uid, ids, {
'msg': msg,
'file_export': base64.encodestring(fn.read()),
})
fn.close()
# update the history record:
anonymization_history_model.write(cr, uid, history_id, {
'field_ids': [[6, 0, field_ids]],
'msg': msg,
'filepath': abs_filepath,
'state': 'done',
})
# handle the view:
view_id = self._id_get(cr, uid, 'ir.ui.view', 'view_ir_model_fields_anonymize_wizard_form', 'anonymization')
return {
'res_id': ids[0],
'view_id': [view_id],
'view_type': 'form',
"view_mode": 'form',
'res_model': 'ir.model.fields.anonymize.wizard',
'type': 'ir.actions.act_window',
'context': {'step': 'just_anonymized'},
'target':'new',
}
def reverse_anonymize_database(self, cr, uid, ids, context=None):
"""Set the 'clear' state to defined fields"""
ir_model_fields_anonymization_model = self.pool.get('ir.model.fields.anonymization')
anonymization_history_model = self.pool.get('ir.model.fields.anonymization.history')
# create a new history record:
vals = {
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'state': 'started',
'direction': 'anonymized -> clear',
}
history_id = anonymization_history_model.create(cr, uid, vals)
# check that all the defined fields are in the 'anonymized' state
state = ir_model_fields_anonymization_model._get_global_state(cr, uid, context=context)
if state == 'clear':
raise osv.except_osv_('Error!', "The database is not currently anonymized, you cannot reverse the anonymization.")
elif state == 'unstable':
msg = _("The database anonymization is currently in an unstable state. Some fields are anonymized," + \
" while some fields are not anonymized. You should try to solve this problem before trying to do anything.")
raise osv.except_osv('Error!', msg)
wizards = self.browse(cr, uid, ids, context=context)
for wizard in wizards:
if not wizard.file_import:
msg = _("It is not possible to reverse the anonymization process without supplying the anonymization export file.")
self._raise_after_history_update(cr, uid, history_id, 'Error !', msg)
# reverse the anonymization:
# load the pickle file content into a data structure:
data = pickle.loads(base64.decodestring(wizard.file_import))
migration_fix_obj = self.pool.get('ir.model.fields.anonymization.migration.fix')
fix_ids = migration_fix_obj.search(cr, uid, [('target_version', '=', '8.0')])
fixes = migration_fix_obj.read(cr, uid, fix_ids, ['model_name', 'field_name', 'query', 'query_type', 'sequence'])
fixes = group(fixes, ('model_name', 'field_name'))
for line in data:
queries = []
table_name = self.pool[line['model_id']]._table if line['model_id'] in self.pool else None
# check if custom sql exists:
key = (line['model_id'], line['field_id'])
custom_updates = fixes.get(key)
if custom_updates:
custom_updates.sort(key=itemgetter('sequence'))
queries = [(record['query'], record['query_type']) for record in custom_updates if record['query_type']]
elif table_name:
queries = [("update %(table)s set %(field)s = %%(value)s where id = %%(id)s" % {
'table': table_name,
'field': line['field_id'],
}, 'sql')]
for query in queries:
if query[1] == 'sql':
sql = query[0]
cr.execute(sql, {
'value': line['value'],
'id': line['id']
})
elif query[1] == 'python':
raw_code = query[0]
code = raw_code % line
eval(code)
else:
raise Exception("Unknown query type '%s'. Valid types are: sql, python." % (query['query_type'], ))
# update the anonymization fields:
ir_model_fields_anonymization_model = self.pool.get('ir.model.fields.anonymization')
field_ids = ir_model_fields_anonymization_model.search(cr, uid, [('state', '<>', 'not_existing')], context=context)
values = {
'state': 'clear',
}
ir_model_fields_anonymization_model.write(cr, uid, field_ids, values, context=context)
# add a result message in the wizard:
msg = '\n'.join(["Successfully reversed the anonymization.",
"",
])
self.write(cr, uid, ids, {'msg': msg})
# update the history record:
anonymization_history_model.write(cr, uid, history_id, {
'field_ids': [[6, 0, field_ids]],
'msg': msg,
'filepath': False,
'state': 'done',
})
# handle the view:
view_id = self._id_get(cr, uid, 'ir.ui.view', 'view_ir_model_fields_anonymize_wizard_form', 'anonymization')
return {
'res_id': ids[0],
'view_id': [view_id],
'view_type': 'form',
"view_mode": 'form',
'res_model': 'ir.model.fields.anonymize.wizard',
'type': 'ir.actions.act_window',
'context': {'step': 'just_desanonymized'},
'target':'new',
}
def _id_get(self, cr, uid, model, id_str, mod):
if '.' in id_str:
mod, id_str = id_str.split('.')
try:
idn = self.pool.get('ir.model.data')._get_id(cr, uid, mod, id_str)
res = int(self.pool.get('ir.model.data').read(cr, uid, [idn], ['res_id'])[0]['res_id'])
except:
res = None
return res
class ir_model_fields_anonymization_migration_fix(osv.osv):
_name = 'ir.model.fields.anonymization.migration.fix'
_order = "sequence"
_columns = {
'target_version': fields.char('Target Version'),
'model_name': fields.char('Model'),
'field_name': fields.char('Field'),
'query': fields.text('Query'),
'query_type': fields.selection(string='Query', selection=[('sql', 'sql'), ('python', 'python')]),
'sequence': fields.integer('Sequence'),
}
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: