[FIX] read_group: remove prohibitive n^2 operations, could freeze the server on large group_by results

This patch replaces two O(n^2) operations, one filter and one sort, with O(n) or better operations. It takes slightly more memory due to the use of one extra data copy, but takes virtually no time to process even with 600k result lines, whereas the previous code would just freeze the server for a long time. bzr revid: odo@openerp.com-20120609010549-xdktw6xte7fmxjbq
2012-06-09 03:05:49 +02:00 · 2012-06-09 03:05:49 +02:00 · 6da2d94357
parent fe39e36f35
commit 6da2d94357
1 changed files with 10 additions and 8 deletions
--- a/openerp/osv/orm.py
+++ b/openerp/osv/orm.py
@ -2564,13 +2564,15 @@ class BaseModel(object):

        order = orderby or groupby
        data_ids = self.search(cr, uid, [('id', 'in', alldata.keys())], order=order, context=context)
+        
        # the IDS of records that have groupby field value = False or '' should be sorted too
-        data_ids += filter(lambda x:x not in data_ids, alldata.keys())
+        data_ids += set(alldata.keys()).difference(data_ids)    
        data = self.read(cr, uid, data_ids, groupby and [groupby] or ['id'], context=context)
-        # restore order of the search as read() uses the default _order (this is only for groups, so the size of data_read shoud be small):
-        data.sort(lambda x,y: cmp(data_ids.index(x['id']), data_ids.index(y['id'])))
+        # restore order of the search as read() uses the default _order (this is only for groups, so the footprint of data should be small):
+        data_dict = dict((d['id'], d[groupby]) for d in data) 
+        result = [{'id': i, groupby: data_dict[i]} for i in data_ids]

-        for d in data:
+        for d in result:
            if groupby:
                d['__domain'] = [(groupby, '=', alldata[d['id']][groupby] or False)] + domain
                if not isinstance(groupby_list, (str, unicode)):
@ -2589,11 +2591,11 @@ class BaseModel(object):
            del d['id']

        if groupby and groupby in self._group_by_full:
-            data = self._read_group_fill_results(cr, uid, domain, groupby, groupby_list,
-                                                 aggregated_fields, data, read_group_order=order,
-                                                 context=context)
+            result = self._read_group_fill_results(cr, uid, domain, groupby, groupby_list,
+                                                   aggregated_fields, result, read_group_order=order,
+                                                   context=context)

-        return data
+        return result

    def _inherits_join_add(self, current_table, parent_model_name, query):
        """