# -*- encoding: utf-8 -*- ############################################################################## # # OpenERP, Open Source Management Solution # Copyright (C) 2004-2009 Tiny SPRL (). All Rights Reserved # $Id$ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ############################################################################## import time import os import StringIO import odt2txt import tempfile # # This should be the indexer # def _to_unicode(s): try: return s.decode('utf-8') except UnicodeError: try: return s.decode('latin') except UnicodeError: try: return s.encode('ascii') except UnicodeError: return s def content_index(content, filename=None, content_type=None): fname,ext = os.path.splitext(filename) result = '' if ext in ('.doc'): #or content_type ? (stdin,stdout) = os.popen2('antiword -', 'b') stdin.write(content) stdin.close() result = _to_unicode(stdout.read()) elif ext == '.pdf': file_descriptor, file_name = tempfile.mkstemp(suffix=ext) os.write(file_descriptor, content) os.close(file_descriptor) fp = os.popen('pdftotext -enc UTF-8 -nopgbrk '+file_name+' -', 'r') result = fp.read() fp.close() elif ext in ('.xls','.ods','.odt','.odp'): s = StringIO.StringIO(content) o = odt2txt.OpenDocumentTextFile(s) result = _to_unicode(o.toString()) s.close() elif ext in ('.txt','.py','.patch','.html','.csv','.xml'): result = content #else: # result = content return result # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: