[IMP] document :- support indexation of .docx to text and PPTX to html
bzr revid: ysa@tinyerp.co.in-20100311085600-4t4gmuzywglpugh0
This commit is contained in:
parent
e7fb07589f
commit
d217272611
|
@ -21,7 +21,7 @@
|
||||||
|
|
||||||
import StringIO
|
import StringIO
|
||||||
import odt2txt
|
import odt2txt
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
from content_index import indexer, cntIndex
|
from content_index import indexer, cntIndex
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,15 +50,28 @@ class TxtIndex(indexer):
|
||||||
|
|
||||||
cntIndex.register(TxtIndex())
|
cntIndex.register(TxtIndex())
|
||||||
|
|
||||||
|
class PptIndex(indexer):
|
||||||
|
def _getMimeTypes(self):
|
||||||
|
return [ 'application/ms-word']
|
||||||
|
|
||||||
|
def _getExtensions(self):
|
||||||
|
return ['.ppt','.pptx']
|
||||||
|
|
||||||
|
def _doIndexFile(self,fname):
|
||||||
|
fp = Popen(['ppthtml', fname], shell=False, stdout=PIPE).stdout
|
||||||
|
return _to_unicode( fp.read())
|
||||||
|
|
||||||
|
cntIndex.register(PptIndex())
|
||||||
|
|
||||||
class DocIndex(indexer):
|
class DocIndex(indexer):
|
||||||
def _getMimeTypes(self):
|
def _getMimeTypes(self):
|
||||||
return [ 'application/ms-word']
|
return [ 'application/ms-word']
|
||||||
|
|
||||||
def _getExtensions(self):
|
def _getExtensions(self):
|
||||||
return ['.doc']
|
return ['.doc','.docx']
|
||||||
|
|
||||||
def _doIndexFile(self,fname):
|
def _doIndexFile(self,fname):
|
||||||
fp = Popen(['antiword',fname], shell=False, stdout=PIPE).stdout
|
fp = Popen(['antiword', fname], shell=False, stdout=PIPE).stdout
|
||||||
return _to_unicode( fp.read())
|
return _to_unicode( fp.read())
|
||||||
|
|
||||||
cntIndex.register(DocIndex())
|
cntIndex.register(DocIndex())
|
||||||
|
|
Loading…
Reference in New Issue