odoo/addons/document/document_storage.py

# -*- encoding: utf-8 -*-
##############################################################################
#
#    OpenERP, Open Source Management Solution
#
#    Copyright (C) P. Christeas, 2009, all rights reserved
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################

from osv import osv, fields
import os
import tools
import base64
from tools.misc import ustr

from osv.orm import except_orm

import random
import string
import netsvc
from content_index import cntIndex


""" The algorithm of data storage

We have to consider 3 cases of data /retrieval/:
 Given (context,path) we need to access the file (aka. node).
 given (directory, context), we need one of its children (for listings, views)
 given (ir.attachment, context), we needs its data and metadata (node).

For data /storage/ we have the cases:
 Have (ir.attachment, context), we modify the file (save, update, rename etc).
 Have (directory, context), we create a file.
 Have (path, context), we create or modify a file.
 
Note that in all above cases, we don't explicitly choose the storage media,
but always require a context to be present.

Note that a node will not always have a corresponding ir.attachment. Dynamic
nodes, for once, won't. Their metadata will be computed by the parent storage
media + directory.

The algorithm says that in any of the above cases, our first goal is to locate
the node for any combination of search criteria. It would be wise NOT to 
represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
but directly jump to the end node (like node[/dir1/dir2]) whenever possible.

We also contain all the parenting loop code in one function. This is intentional,
because one day this will be optimized in the db (Pg 8.4).


"""

def random_name():
    random.seed()
    d = [random.choice(string.ascii_letters) for x in xrange(10) ]
    name = "".join(d)
    return name

INVALID_CHARS={'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}


def create_directory(path):
    dir_name = random_name()
    path = os.path.join(path,dir_name)
    os.makedirs(path)
    return dir_name


class document_storage(osv.osv):
    """ The primary object for data storage.
    Each instance of this object is a storage media, in which our application
    can store contents. The object here controls the behaviour of the storage
    media.
    The referring document.directory-ies will control the placement of data
    into the storage.
    
    It is a bad idea to have multiple document.storage objects pointing to
    the same tree of filesystem storage.
    """
    _name = 'document.storage'
    _description = 'Document storage media'
    _columns = {
        'name': fields.char('Name', size=64, required=True, select=1),
        'write_date': fields.datetime('Date Modified', readonly=True),
        'write_uid':  fields.many2one('res.users', 'Last Modification User', readonly=True),
        'create_date': fields.datetime('Date Created', readonly=True),
        'create_uid':  fields.many2one('res.users', 'Creator', readonly=True),
        'user_id': fields.many2one('res.users', 'Owner'),
        'group_ids': fields.many2many('res.groups', 'document_directory_group_rel', 'item_id', 'group_id', 'Groups'),
        'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
        'type': fields.selection([('db','Database'),('filestore','Internal File storage'),
		('realstore','External file storage'), ('virtual','Virtual storage')], 'Type', required=True),
	'path': fields.char('Path',size=250,select=1,help="For file storage, the root path of the storage"),
	'online': fields.boolean('Online',help="If not checked, media is currently offline and its contents not available", required=True),
	'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
    }

    def _get_rootpath(self,cr,uid,context=None):
	from tools import config
        return os.path.join(tools.config['root_path'], 'filestore', cr.dbname)

    _defaults = {
        'user_id': lambda self,cr,uid,ctx: uid,
	'online': lambda *args: True,
	'readonly': lambda *args: False,
        # Note: the defaults below should only be used ONCE for the default
        # storage media. All other times, we should create different paths at least.
        'type': lambda *args: 'filestore',
        'path': _get_rootpath,
    }
    _sql_constraints = [
	# SQL note: a path = NULL doesn't have to be unique.
	('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
	]
	
    def get_data(self, cr,uid, id, file_node,context = None, fil_obj = None):
	""" retrieve the contents of some file_node having storage_id = id
	    optionally, fil_obj could point to the browse object of the file
	    (ir.attachment)
	"""
	if not context:
		context = {}
        boo = self.browse(cr,uid,id,context)
	if fil_obj:
		ira = fil_obj
	else:
		ira = self.pool.get('ir.attachment').browse(cr,uid, file_node.file_id, context=context)
	return self.__get_data_3(cr,uid,boo, ira, context)
	
    def __get_data_3(self,cr,uid, boo, ira, context):
	if not boo.online:
		raise RuntimeError('media offline')
	if boo.type == 'filestore':
		if not ira.store_fname:
			# On a migrated db, some files may have the wrong storage type
			# try to fix their directory.
			if ira.file_size:
				netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"ir.attachment #%d does not have a filename, but is at filestore, fix it!" %ira.id)
			return None
		fpath = os.path.join(boo.path,ira.store_fname)
		print "Trying to read \"%s\".."% fpath
		return file(fpath,'rb').read()
	elif boo.type == 'db':
		# TODO: we need a better api for large files
		if ira.db_datas:
			out=base64.decodestring(ira.db_datas)
		else:
			out=''
		return out
	elif boo.type == 'realstore':
		# fpath = os.path.join(boo.path,
		return None
	else:
		raise TypeError("No %s storage" % boo.type)

    def set_data(self, cr,uid, id, file_node, data, context = None, fil_obj = None):
	""" store the data.
	    This function MUST be used from an ir.attachment. It wouldn't make sense
	    to store things persistently for other types (dynamic).
	"""
	if not context:
		context = {}
        boo = self.browse(cr,uid,id,context)
	logger = netsvc.Logger()
	if fil_obj:
		ira = fil_obj
	else:
		ira = self.pool.get('ir.attachment').browse(cr,uid, file_node.file_id, context=context)
		
	if not boo.online:
		raise RuntimeError('media offline')
	logger.notifyChannel('document',netsvc.LOG_DEBUG,"Store data for ir.attachment #%d" %ira.id)
	store_fname = None
	fname = None
	if boo.type == 'filestore':
	    path = boo.path
	    try:
		flag = None
		# This can be improved
		for dirs in os.listdir(path):
		    if os.path.isdir(os.path.join(path,dirs)) and len(os.listdir(os.path.join(path,dirs)))<4000:
			flag = dirs
                        break
		flag = flag or create_directory(path)
		filename = random_name()
		fname = os.path.join(path, flag, filename)
		fp = file(fname,'wb')
		fp.write(data)
		fp.close()
		logger.notifyChannel('document',netsvc.LOG_DEBUG,"Saved data to %s" % fname)
		filesize = len(data) # os.stat(fname).st_size
		store_fname = os.path.join(flag,filename)
		
		# TODO Here, an old file would be left hanging.

            except Exception,e :
		netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" %str(e))
		raise except_orm(_('Error!'), str(e))
	elif boo.type == 'db':
	    filesize = len(data)
	    # will that work for huge data? TODO
	    out=base64.encodestring(data)
	    cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
			( out, file_node.file_id ))
	else:
		raise TypeError("No %s storage" % boo.type)

	# 2nd phase: store the metadata
	try:
	    icont = ''
	    mime = ira.file_type
	    try:
		mime,icont = cntIndex.doIndex(data, ira.datas_fname, 
		ira.file_type or None,fname)
	    except Exception,e:
		logger.notifyChannel('document', netsvc.LOG_DEBUG, 'Cannot index file: %s' % str(e))
		pass

	    # a hack: /assume/ that the calling write operation will not try
	    # to write the fname and size, and update them in the db concurrently.
	    # We cannot use a write() here, because we are already in one.
	    cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
			(store_fname, filesize, ustr(icont), mime, file_node.file_id ))
	    file_node.content_length = filesize
	    file_node.content_type = mime
	    return True
        except Exception,e :
	    netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" %str(e))
	    # should we really rollback once we have written the actual data?
	    # at the db case (only), that rollback would be safe
	    raise except_orm(_('Error at doc write!'), str(e))

    def prepare_unlink(self,cr,uid,storage_bo, fil_bo):
	""" Before we unlink a file (fil_boo), prepare the list of real
	files that have to be removed, too. """
	
	if not storage_bo.online:
		raise RuntimeError('media offline')
	
	if storage_bo.type == 'filestore':
	    fname = fil_bo.store_fname
	    if not fname:
		return None
	    path = storage_bo.path
	    return ( storage_bo.id, 'file', os.path.join(path,fname))
	elif storage_bo.type == 'db':
	    return None
	else:
	    raise TypeError("No %s storage" % boo.type)

    def do_unlink(self, cr,uid,unres):
	for id, ktype, fname in unres:
	    if ktype == 'file':
	        try:
		    os.unlink(fname)
		except Exception,e:
		    netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Could not remove file %s, please remove manually." % fname)
	    else:
		    netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Unknown unlink key %s" % ktype)
	
	return True
	

document_storage()


#eof
[IMP]document : get improvement of document from xrg-addons branch who made nice improvement in document bzr revid: hmo@tinyerp.com-20091202053657-4oflezyhbolu3xvo 2009-12-02 05:36:57 +00:00			`# -- encoding: utf-8 --`
			`##############################################################################`
			`#`
			`# OpenERP, Open Source Management Solution`
			`#`
			`# Copyright (C) P. Christeas, 2009, all rights reserved`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`
			`##############################################################################`

			`from osv import osv, fields`
			`import os`
			`import tools`
			`import base64`
			`from tools.misc import ustr`

			`from osv.orm import except_orm`

			`import random`
			`import string`
			`import netsvc`
			`from content_index import cntIndex`


			`""" The algorithm of data storage`

			`We have to consider 3 cases of data /retrieval/:`
			`Given (context,path) we need to access the file (aka. node).`
			`given (directory, context), we need one of its children (for listings, views)`
			`given (ir.attachment, context), we needs its data and metadata (node).`

			`For data /storage/ we have the cases:`
			`Have (ir.attachment, context), we modify the file (save, update, rename etc).`
			`Have (directory, context), we create a file.`
			`Have (path, context), we create or modify a file.`

			`Note that in all above cases, we don't explicitly choose the storage media,`
			`but always require a context to be present.`

			`Note that a node will not always have a corresponding ir.attachment. Dynamic`
			`nodes, for once, won't. Their metadata will be computed by the parent storage`
			`media + directory.`

			`The algorithm says that in any of the above cases, our first goal is to locate`
			`the node for any combination of search criteria. It would be wise NOT to`
			`represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])`
			`but directly jump to the end node (like node[/dir1/dir2]) whenever possible.`

			`We also contain all the parenting loop code in one function. This is intentional,`
			`because one day this will be optimized in the db (Pg 8.4).`


			`"""`

			`def random_name():`
			`random.seed()`
			`d = [random.choice(string.ascii_letters) for x in xrange(10) ]`
			`name = "".join(d)`
			`return name`

			`INVALID_CHARS={'':str(hash('')), '\|':str(hash('\|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}`


			`def create_directory(path):`
			`dir_name = random_name()`
			`path = os.path.join(path,dir_name)`
			`os.makedirs(path)`
			`return dir_name`


			`class document_storage(osv.osv):`
			`""" The primary object for data storage.`
			`Each instance of this object is a storage media, in which our application`
			`can store contents. The object here controls the behaviour of the storage`
			`media.`
			`The referring document.directory-ies will control the placement of data`
			`into the storage.`

			`It is a bad idea to have multiple document.storage objects pointing to`
			`the same tree of filesystem storage.`
			`"""`
			`_name = 'document.storage'`
			`_description = 'Document storage media'`
			`_columns = {`
			`'name': fields.char('Name', size=64, required=True, select=1),`
			`'write_date': fields.datetime('Date Modified', readonly=True),`
			`'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),`
			`'create_date': fields.datetime('Date Created', readonly=True),`
			`'create_uid': fields.many2one('res.users', 'Creator', readonly=True),`
			`'user_id': fields.many2one('res.users', 'Owner'),`
			`'group_ids': fields.many2many('res.groups', 'document_directory_group_rel', 'item_id', 'group_id', 'Groups'),`
			`'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),`
			`'type': fields.selection([('db','Database'),('filestore','Internal File storage'),`
			`('realstore','External file storage'), ('virtual','Virtual storage')], 'Type', required=True),`
			`'path': fields.char('Path',size=250,select=1,help="For file storage, the root path of the storage"),`
			`'online': fields.boolean('Online',help="If not checked, media is currently offline and its contents not available", required=True),`
			`'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),`
			`}`

			`def _get_rootpath(self,cr,uid,context=None):`
			`from tools import config`
			`return os.path.join(tools.config['root_path'], 'filestore', cr.dbname)`

			`_defaults = {`
			`'user_id': lambda self,cr,uid,ctx: uid,`
			`'online': lambda *args: True,`
			`'readonly': lambda *args: False,`
			`# Note: the defaults below should only be used ONCE for the default`
			`# storage media. All other times, we should create different paths at least.`
			`'type': lambda *args: 'filestore',`
			`'path': _get_rootpath,`
			`}`
			`_sql_constraints = [`
			`# SQL note: a path = NULL doesn't have to be unique.`
			`('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")`
			`]`

			`def get_data(self, cr,uid, id, file_node,context = None, fil_obj = None):`
			`""" retrieve the contents of some file_node having storage_id = id`
			`optionally, fil_obj could point to the browse object of the file`
			`(ir.attachment)`
			`"""`
			`if not context:`
			`context = {}`
			`boo = self.browse(cr,uid,id,context)`
			`if fil_obj:`
			`ira = fil_obj`
			`else:`
			`ira = self.pool.get('ir.attachment').browse(cr,uid, file_node.file_id, context=context)`
			`return self.__get_data_3(cr,uid,boo, ira, context)`

			`def __get_data_3(self,cr,uid, boo, ira, context):`
			`if not boo.online:`
			`raise RuntimeError('media offline')`
			`if boo.type == 'filestore':`
			`if not ira.store_fname:`
			`# On a migrated db, some files may have the wrong storage type`
			`# try to fix their directory.`
			`if ira.file_size:`
			`netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"ir.attachment #%d does not have a filename, but is at filestore, fix it!" %ira.id)`
			`return None`
			`fpath = os.path.join(boo.path,ira.store_fname)`
			`print "Trying to read \"%s\".."% fpath`
			`return file(fpath,'rb').read()`
			`elif boo.type == 'db':`
			`# TODO: we need a better api for large files`
			`if ira.db_datas:`
			`out=base64.decodestring(ira.db_datas)`
			`else:`
			`out=''`
			`return out`
			`elif boo.type == 'realstore':`
			`# fpath = os.path.join(boo.path,`
			`return None`
			`else:`
			`raise TypeError("No %s storage" % boo.type)`

			`def set_data(self, cr,uid, id, file_node, data, context = None, fil_obj = None):`
			`""" store the data.`
			`This function MUST be used from an ir.attachment. It wouldn't make sense`
			`to store things persistently for other types (dynamic).`
			`"""`
			`if not context:`
			`context = {}`
			`boo = self.browse(cr,uid,id,context)`
			`logger = netsvc.Logger()`
			`if fil_obj:`
			`ira = fil_obj`
			`else:`
			`ira = self.pool.get('ir.attachment').browse(cr,uid, file_node.file_id, context=context)`

			`if not boo.online:`
			`raise RuntimeError('media offline')`
			`logger.notifyChannel('document',netsvc.LOG_DEBUG,"Store data for ir.attachment #%d" %ira.id)`
			`store_fname = None`
			`fname = None`
			`if boo.type == 'filestore':`
			`path = boo.path`
			`try:`
			`flag = None`
			`# This can be improved`
			`for dirs in os.listdir(path):`
			`if os.path.isdir(os.path.join(path,dirs)) and len(os.listdir(os.path.join(path,dirs)))<4000:`
			`flag = dirs`
			`break`
			`flag = flag or create_directory(path)`
			`filename = random_name()`
			`fname = os.path.join(path, flag, filename)`
			`fp = file(fname,'wb')`
			`fp.write(data)`
			`fp.close()`
			`logger.notifyChannel('document',netsvc.LOG_DEBUG,"Saved data to %s" % fname)`
			`filesize = len(data) # os.stat(fname).st_size`
			`store_fname = os.path.join(flag,filename)`

			`# TODO Here, an old file would be left hanging.`

			`except Exception,e :`
			`netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" %str(e))`
			`raise except_orm(_('Error!'), str(e))`
			`elif boo.type == 'db':`
			`filesize = len(data)`
			`# will that work for huge data? TODO`
			`out=base64.encodestring(data)`
			`cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',`
			`( out, file_node.file_id ))`
			`else:`
			`raise TypeError("No %s storage" % boo.type)`

			`# 2nd phase: store the metadata`
			`try:`
			`icont = ''`
			`mime = ira.file_type`
			`try:`
			`mime,icont = cntIndex.doIndex(data, ira.datas_fname,`
			`ira.file_type or None,fname)`
			`except Exception,e:`
			`logger.notifyChannel('document', netsvc.LOG_DEBUG, 'Cannot index file: %s' % str(e))`
			`pass`

			`# a hack: /assume/ that the calling write operation will not try`
			`# to write the fname and size, and update them in the db concurrently.`
			`# We cannot use a write() here, because we are already in one.`
			`cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',`
			`(store_fname, filesize, ustr(icont), mime, file_node.file_id ))`
			`file_node.content_length = filesize`
			`file_node.content_type = mime`
			`return True`
			`except Exception,e :`
			`netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" %str(e))`
			`# should we really rollback once we have written the actual data?`
			`# at the db case (only), that rollback would be safe`
			`raise except_orm(_('Error at doc write!'), str(e))`

			`def prepare_unlink(self,cr,uid,storage_bo, fil_bo):`
			`""" Before we unlink a file (fil_boo), prepare the list of real`
			`files that have to be removed, too. """`

			`if not storage_bo.online:`
			`raise RuntimeError('media offline')`

			`if storage_bo.type == 'filestore':`
			`fname = fil_bo.store_fname`
			`if not fname:`
			`return None`
			`path = storage_bo.path`
			`return ( storage_bo.id, 'file', os.path.join(path,fname))`
			`elif storage_bo.type == 'db':`
			`return None`
			`else:`
			`raise TypeError("No %s storage" % boo.type)`

			`def do_unlink(self, cr,uid,unres):`
			`for id, ktype, fname in unres:`
			`if ktype == 'file':`
			`try:`
			`os.unlink(fname)`
			`except Exception,e:`
			`netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Could not remove file %s, please remove manually." % fname)`
			`else:`
			`netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Unknown unlink key %s" % ktype)`

			`return True`


			`document_storage()`


			`#eof`