From 54d30d5194117e2e2484a199bc66464224bd1152 Mon Sep 17 00:00:00 2001 From: Fabien Pinckaers Date: Sun, 11 May 2014 13:52:31 +0200 Subject: [PATCH] [IMP] clean sitemap, enumerate pages + fixes bzr revid: fp@tinyerp.com-20140511115231-g8ke14r9iepyypv4 --- addons/website/controllers/main.py | 35 ++++++--- addons/website/data/data.xml | 2 +- addons/website/models/ir_http.py | 41 +++++----- addons/website/models/website.py | 78 +++++++------------ addons/website/tests/test_requests.py | 2 +- addons/website/views/website_templates.xml | 40 +++++----- addons/website_blog/controllers/main.py | 4 +- addons/website_blog/models/website_blog.py | 2 - .../controllers/main.py | 1 + addons/website_forum/controllers/main.py | 6 +- addons/website_forum_doc/controllers/main.py | 6 +- addons/website_mail_group/controllers/main.py | 2 +- addons/website_quote/controllers/main.py | 4 +- addons/website_sale/controllers/main.py | 2 + 14 files changed, 112 insertions(+), 113 deletions(-) diff --git a/addons/website/controllers/main.py b/addons/website/controllers/main.py index eade32ec020..7880603d720 100644 --- a/addons/website/controllers/main.py +++ b/addons/website/controllers/main.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Completely arbitrary limits MAX_IMAGE_WIDTH, MAX_IMAGE_HEIGHT = IMAGE_LIMITS = (1024, 768) - +LOC_PER_SITEMAP = 45000 class Website(openerp.addons.web.controllers.main.Home): #------------------------------------------------------ @@ -69,20 +69,34 @@ class Website(openerp.addons.web.controllers.main.Home): return request.render(page, values) - @http.route(['/robots.txt'], type='http', auth="public", website=True) + @http.route(['/robots.txt'], type='http', auth="public") def robots(self): return request.render('website.robots', {'url_root': request.httprequest.url_root}, mimetype='text/plain') - @http.route('/sitemap', type='http', auth='public', website=True, multilang=True) - def sitemap(self): - return request.render('website.sitemap', { - 'pages': request.website.enumerate_pages() - }) - @http.route('/sitemap.xml', type='http', auth="public", website=True) - def sitemap_xml(self): + def sitemap_xml_index(self): + pages = list(request.website.enumerate_pages()) + if len(pages)<=LOC_PER_SITEMAP: + return self.__sitemap_xml(pages, 0) + # Sitemaps must be split in several smaller files with a sitemap index values = { - 'pages': request.website.enumerate_pages() + 'pages': range(len(pages)/LOC_PER_SITEMAP+1), + 'url_root': request.httprequest.url_root + } + headers = { + 'Content-Type': 'application/xml;charset=utf-8', + } + return request.render('website.sitemap_index_xml', values, headers=headers) + + @http.route('/sitemap-.xml', type='http', auth="public", website=True) + def sitemap_xml(self, page): + pages = list(request.website.enumerate_pages()) + return self.__sitemap_xml(pages, page) + + def __sitemap_xml(self, pages, index=0): + values = { + 'pages': pages[index*LOC_PER_SITEMAP:(index+1)*LOC_PER_SITEMAP], + 'url_root': request.httprequest.url_root.rstrip('/') } headers = { 'Content-Type': 'application/xml;charset=utf-8', @@ -428,4 +442,3 @@ class Website(openerp.addons.web.controllers.main.Home): return res return request.redirect('/') -# vim:et: diff --git a/addons/website/data/data.xml b/addons/website/data/data.xml index 4eff1b8a60d..c5254d65c0f 100644 --- a/addons/website/data/data.xml +++ b/addons/website/data/data.xml @@ -15,7 +15,7 @@ Home - / + /page/homepage 10 diff --git a/addons/website/models/ir_http.py b/addons/website/models/ir_http.py index 2a774939c60..d90b40bf15e 100644 --- a/addons/website/models/ir_http.py +++ b/addons/website/models/ir_http.py @@ -178,8 +178,9 @@ class ir_http(orm.AbstractModel): return super(ir_http, self)._handle_exception(exception) class ModelConverter(ir.ir_http.ModelConverter): - def __init__(self, url_map, model=False): + def __init__(self, url_map, model=False, domain='[]'): super(ModelConverter, self).__init__(url_map, model) + self.domain = domain self.regex = r'(?:[A-Za-z0-9-_]+?-)?(\d+)(?=$|/)' def to_url(self, value): @@ -191,24 +192,28 @@ class ModelConverter(ir.ir_http.ModelConverter): return request.registry[self.model].browse( request.cr, _uid, int(m.group(1)), context=request.context) - def generate(self, cr, uid, query=None, context=None): - return request.registry[self.model].name_search( - cr, uid, name=query or '', context=context) + def generate(self, cr, uid, query=None, args=None, context=None): + for record in request.registry[self.model].name_search( + cr, uid, name=query or '', args=eval( self.domain, (args or {}).copy()), + context=context): + yield {'loc': record} class PageConverter(werkzeug.routing.PathConverter): - """ Only point of this converter is to bundle pages enumeration logic - - Sads got: no way to get the view's human-readable name even if one exists - """ - def generate(self, cr, uid, query=None, context=None): + """ Only point of this converter is to bundle pages enumeration logic """ + def generate(self, cr, uid, query=None, args={}, context=None): View = request.registry['ir.ui.view'] - views = View.search_read( - cr, uid, [['page', '=', True]], - fields=[], order='name', context=context) - xids = View.get_external_id( - cr, uid, [view['id'] for view in views], context=context) - + views = View.search_read(cr, uid, [['page', '=', True]], + fields=['xml_id','priority','write_date'], order='name', context=context) for view in views: - xid = xids[view['id']] - if xid and (not query or query.lower() in xid.lower()): - yield xid + xid = view['xml_id'].startswith('website.') and view['xml_id'][8:] or view['xml_id'] + # the 'page/homepage' url is indexed as '/', avoid aving the same page referenced twice + # when we will have an url mapping mechanism, replace this by a rule: page/homepage --> / + if xid=='homepage': continue + if query and query.lower() not in xid.lower(): + continue + record = {'loc': xid} + if view['priority'] <> 16: + record['__priority'] = min(round(view['priority'] / 32.0,1), 1) + if view['write_date']: + record['__lastmod'] = view['write_date'][:10] + yield record diff --git a/addons/website/models/website.py b/addons/website/models/website.py index fcaa184d86e..40a296a1a21 100644 --- a/addons/website/models/website.py +++ b/addons/website/models/website.py @@ -283,44 +283,23 @@ class website(osv.osv): endpoint = rule.endpoint methods = rule.methods or ['GET'] converters = rule._converters.values() - - return ( - 'GET' in methods + if not ('GET' in methods and endpoint.routing['type'] == 'http' and endpoint.routing['auth'] in ('none', 'public') and endpoint.routing.get('website', False) - # preclude combinatorial explosion by only allowing a single converter - and len(converters) <= 1 - # ensure all converters on the rule are able to generate values for - # themselves and all(hasattr(converter, 'generate') for converter in converters) - ) and self.endpoint_is_enumerable(rule) - - def endpoint_is_enumerable(self, rule): - """ Verifies that it's possible to generate a valid url for the rule's - endpoint - - :type rule: werkzeug.routing.Rule - :rtype: bool - """ - spec = inspect.getargspec(rule.endpoint.method) - - # if *args bail the fuck out, only dragons can live there - if spec.varargs: + and endpoint.routing.get('website')): return False - # remove all arguments with a default value from the list - defaults_count = len(spec.defaults or []) # spec.defaults can be None - # a[:-0] ~ a[:0] ~ [] -> replace defaults_count == 0 by None to get - # a[:None] ~ a - args = spec.args[:(-defaults_count or None)] + # dont't list routes without argument having no default value or converter + spec = inspect.getargspec(endpoint.method.original_func) - # params with defaults were removed, leftover allowed are: - # * self (technically should be first-parameter-of-instance-method but whatever) - # * any parameter mapping to a converter - return all( - (arg == 'self' or arg in rule._converters) - for arg in args) + # remove self and arguments having a default value + defaults_count = len(spec.defaults or []) + args = spec.args[1:(-defaults_count or None)] + + # check that all args have a converter + return all( (arg in rule._converters) for arg in args) def enumerate_pages(self, cr, uid, ids, query_string=None, context=None): """ Available pages in the website/CMS. This is mostly used for links @@ -344,27 +323,30 @@ class website(osv.osv): if not self.rule_is_enumerable(rule): continue - converters = rule._converters - filtered = bool(converters) - if converters: - # allow single converter as decided by fp, checked by - # rule_is_enumerable - [(name, converter)] = converters.items() - converter_values = converter.generate( - request.cr, uid, query=query_string, context=context) - generated = ({k: v} for k, v in itertools.izip( - itertools.repeat(name), converter_values)) - else: - # force single iteration for literal urls - generated = [{}] + converters = rule._converters or {} + values = [{}] + for (name, converter) in converters.items(): + newval = [] + for val in values: + for v in converter.generate(request.cr, uid, query=query_string, args=val, context=context): + newval.append( val.copy() ) + v[name] = v['loc'] + del v['loc'] + newval[-1].update(v) + values = newval - for values in generated: - domain_part, url = rule.build(values, append_unknown=False) - page = {'name': url, 'url': url} + for value in values: + domain_part, url = rule.build(value, append_unknown=False) + page = {'loc': url} + for key,val in value.items(): + if key.startswith('__'): + page[key[2:]] = val + if url in ('/sitemap.xml',): + continue if url in url_list: continue url_list.append(url) - if not filtered and query_string and not self.page_matches(cr, uid, page, query_string, context=context): + if query_string and not self.page_matches(cr, uid, page, query_string, context=context): continue yield page diff --git a/addons/website/tests/test_requests.py b/addons/website/tests/test_requests.py index 127aea274d5..2d14cbc668d 100644 --- a/addons/website/tests/test_requests.py +++ b/addons/website/tests/test_requests.py @@ -93,7 +93,7 @@ class CrawlSuite(unittest2.TestSuite): # switch registry to test mode, so that requests can be made registry.enter_test_mode() - paths = [URL('/'), URL('/sitemap')] + paths = [URL('/')] seen = set(paths) while paths: diff --git a/addons/website/views/website_templates.xml b/addons/website/views/website_templates.xml index 0b99d7a892c..890d6080039 100644 --- a/addons/website/views/website_templates.xml +++ b/addons/website/views/website_templates.xml @@ -39,7 +39,7 @@ ((submenu.url != '/' and request.httprequest.path.startswith(submenu.url)) or request.httprequest.path == submenu.url) and 'active' "> - + @@ -470,7 +470,6 @@

This page does not exists, but you can create it as you are administrator of this site.

Create Page - or Search a Page
Edit the content below this line to adapt the default "page not found" page.
@@ -706,34 +705,33 @@ -