[IMP] document :- support indexation of .docx to text and PPTX to html

bzr revid: ysa@tinyerp.co.in-20100311085600-4t4gmuzywglpugh0
This commit is contained in:
Ysa (Open ERP) 2010-03-11 14:26:00 +05:30
parent e7fb07589f
commit d217272611
1 changed files with 16 additions and 3 deletions

View File

@ -21,7 +21,7 @@
import StringIO
import odt2txt
from subprocess import Popen, PIPE
from content_index import indexer, cntIndex
@ -50,15 +50,28 @@ class TxtIndex(indexer):
cntIndex.register(TxtIndex())
class PptIndex(indexer):
def _getMimeTypes(self):
return [ 'application/ms-word']
def _getExtensions(self):
return ['.ppt','.pptx']
def _doIndexFile(self,fname):
fp = Popen(['ppthtml', fname], shell=False, stdout=PIPE).stdout
return _to_unicode( fp.read())
cntIndex.register(PptIndex())
class DocIndex(indexer):
def _getMimeTypes(self):
return [ 'application/ms-word']
def _getExtensions(self):
return ['.doc']
return ['.doc','.docx']
def _doIndexFile(self,fname):
fp = Popen(['antiword',fname], shell=False, stdout=PIPE).stdout
fp = Popen(['antiword', fname], shell=False, stdout=PIPE).stdout
return _to_unicode( fp.read())
cntIndex.register(DocIndex())