122 lines
3.8 KiB
Diff
122 lines
3.8 KiB
Diff
From: Nicolas Pitre <nico@cam.org>
|
|
Date: Sat, 30 May 2009 01:55:50 +0000 (-0400)
|
|
Subject: [ARM] alternative copy_to_user: more precise fallback threshold
|
|
X-Git-Url: http://git.marvell.com/?p=orion.git;a=commitdiff_plain;h=c626e3f5ca1d95ad2204d3128c26e7678714eb55
|
|
|
|
[ARM] alternative copy_to_user: more precise fallback threshold
|
|
|
|
Previous size thresholds were guessed from various user space benchmarks
|
|
using a kernel with and without the alternative uaccess option. This
|
|
is however not as precise as a kernel based test to measure the real
|
|
speed of each method.
|
|
|
|
This adds a simple test bench to show the time needed for each method.
|
|
With this, the optimal size treshold for the alternative implementation
|
|
can be determined with more confidence. It appears that the optimal
|
|
threshold for both copy_to_user and clear_user is around 64 bytes. This
|
|
is not a surprise knowing that the memcpy and memset implementations
|
|
need at least 64 bytes to achieve maximum throughput.
|
|
|
|
One might suggest that such test be used to determine the optimal
|
|
threshold at run time instead, but results are near enough to 64 on
|
|
tested targets concerned by this alternative copy_to_user implementation,
|
|
so adding some overhead associated with a variable threshold is probably
|
|
not worth it for now.
|
|
|
|
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
|
---
|
|
|
|
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
|
|
index 92838e7..6b967ff 100644
|
|
--- a/arch/arm/lib/uaccess_with_memcpy.c
|
|
+++ b/arch/arm/lib/uaccess_with_memcpy.c
|
|
@@ -106,7 +106,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
|
|
* With frame pointer disabled, tail call optimization kicks in
|
|
* as well making this test almost invisible.
|
|
*/
|
|
- if (n < 1024)
|
|
+ if (n < 64)
|
|
return __copy_to_user_std(to, from, n);
|
|
return __copy_to_user_memcpy(to, from, n);
|
|
}
|
|
@@ -151,7 +151,78 @@ out:
|
|
unsigned long __clear_user(void __user *addr, unsigned long n)
|
|
{
|
|
/* See rational for this in __copy_to_user() above. */
|
|
- if (n < 256)
|
|
+ if (n < 64)
|
|
return __clear_user_std(addr, n);
|
|
return __clear_user_memset(addr, n);
|
|
}
|
|
+
|
|
+#if 0
|
|
+
|
|
+/*
|
|
+ * This code is disabled by default, but kept around in case the chosen
|
|
+ * thresholds need to be revalidated. Some overhead (small but still)
|
|
+ * would be implied by a runtime determined variable threshold, and
|
|
+ * so far the measurement on concerned targets didn't show a worthwhile
|
|
+ * variation.
|
|
+ *
|
|
+ * Note that a fairly precise sched_clock() implementation is needed
|
|
+ * for results to make some sense.
|
|
+ */
|
|
+
|
|
+#include <linux/vmalloc.h>
|
|
+
|
|
+static int __init test_size_treshold(void)
|
|
+{
|
|
+ struct page *src_page, *dst_page;
|
|
+ void *user_ptr, *kernel_ptr;
|
|
+ unsigned long long t0, t1, t2;
|
|
+ int size, ret;
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+ src_page = alloc_page(GFP_KERNEL);
|
|
+ if (!src_page)
|
|
+ goto no_src;
|
|
+ dst_page = alloc_page(GFP_KERNEL);
|
|
+ if (!dst_page)
|
|
+ goto no_dst;
|
|
+ kernel_ptr = page_address(src_page);
|
|
+ user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
|
|
+ if (!user_ptr)
|
|
+ goto no_vmap;
|
|
+
|
|
+ /* warm up the src page dcache */
|
|
+ ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE);
|
|
+
|
|
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
|
|
+ t0 = sched_clock();
|
|
+ ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size);
|
|
+ t1 = sched_clock();
|
|
+ ret |= __copy_to_user_std(user_ptr, kernel_ptr, size);
|
|
+ t2 = sched_clock();
|
|
+ printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
|
|
+ }
|
|
+
|
|
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
|
|
+ t0 = sched_clock();
|
|
+ ret |= __clear_user_memset(user_ptr, size);
|
|
+ t1 = sched_clock();
|
|
+ ret |= __clear_user_std(user_ptr, size);
|
|
+ t2 = sched_clock();
|
|
+ printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
|
|
+ }
|
|
+
|
|
+ if (ret)
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ vunmap(user_ptr);
|
|
+no_vmap:
|
|
+ put_page(dst_page);
|
|
+no_dst:
|
|
+ put_page(src_page);
|
|
+no_src:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+subsys_initcall(test_size_treshold);
|
|
+
|
|
+#endif
|