generic-poky/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
Richard Purdie 29d6678fd5 Major layout change to the packages directory
Having one monolithic packages directory makes it hard to find things
and is generally overwhelming. This commit splits it into several
logical sections roughly based on function, recipes.txt gives more
information about the classifications used.

The opportunity is also used to switch from "packages" to "recipes"
as used in OpenEmbedded as the term "packages" can be confusing to
people and has many different meanings.

Not all recipes have been classified yet, this is just a first pass
at separating things out. Some packages are moved to meta-extras as
they're no longer actively used or maintained.

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
2010-08-27 15:29:45 +01:00

147 lines
5.2 KiB
Diff

--- /tmp/image-format-conversions.h 2009-02-03 10:18:04.000000000 +0100
+++ git/src/image-format-conversions.h 2009-02-03 10:19:18.000000000 +0100
@@ -30,6 +30,8 @@
/* Basic C implementation of YV12/I420 to UYVY conversion */
void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+/* NEON implementation of YV12/I420 to UYVY conversion */
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
#endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
--- /tmp/image-format-conversions.c 2009-02-03 10:18:04.000000000 +0100
+++ git/src/image-format-conversions.c 2009-02-03 10:16:47.000000000 +0100
@@ -2,6 +2,7 @@
* Copyright 2008 Kalle Vahlman, <zuh@iki.fi>
* Ilpo Ruotsalainen, <lonewolf@iki.fi>
* Tuomas Kulve, <tuomas.kulve@movial.com>
+ * Ian Rickards, <ian.rickards@arm.com>
*
*
* Permission to use, copy, modify, distribute and sell this software and its
@@ -89,3 +90,104 @@
}
}
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
+{
+ int x, y;
+ uint8_t *dest_even = dest;
+ uint8_t *dest_odd = dest + w * 2;
+ uint8_t *y_p_even = y_p;
+ uint8_t *y_p_odd = y_p + y_pitch;
+
+ /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
+ if (w<16)
+ {
+ for (y=0; y<h; y+=2)
+ {
+ for (x=0; x<w; x+=2)
+ {
+ /* Output two 2x1 macroblocks to form a 2x2 block from input */
+ uint8_t u_val = *u_p++;
+ uint8_t v_val = *v_p++;
+
+ /* Even row, first pixel */
+ *dest_even++ = u_val;
+ *dest_even++ = *y_p_even++;
+
+ /* Even row, second pixel */
+ *dest_even++ = v_val;
+ *dest_even++ = *y_p_even++;
+
+ /* Odd row, first pixel */
+ *dest_odd++ = u_val;
+ *dest_odd++ = *y_p_odd++;
+
+ /* Odd row, second pixel */
+ *dest_odd++ = v_val;
+ *dest_odd++ = *y_p_odd++;
+ }
+
+ dest_even += w * 2;
+ dest_odd += w * 2;
+
+ u_p += ((uv_pitch << 1) - w) >> 1;
+ v_p += ((uv_pitch << 1) - w) >> 1;
+
+ y_p_even += (y_pitch - w) + y_pitch;
+ y_p_odd += (y_pitch - w) + y_pitch;
+ }
+ }
+ else
+ {
+ for (y=0; y<h; y+=2)
+ {
+ x=w;
+ do {
+ // avoid using d8-d15 (q4-q7) aapcs callee-save registers
+ asm volatile (
+ "1:\n\t"
+ "vld1.u8 {d0}, [%[u_p]]!\n\t"
+ "sub %[x],%[x],#16\n\t"
+ "cmp %[x],#16\n\t"
+ "vld1.u8 {d1}, [%[v_p]]!\n\t"
+ "vld1.u8 {q1}, [%[y_p_even]]!\n\t"
+ "vzip.u8 d0, d1\n\t"
+ "vld1.u8 {q2}, [%[y_p_odd]]!\n\t"
+ // use 2-element struct stores to zip up y with y&v
+ "vst2.u8 {q0,q1}, [%[dest_even]]!\n\t"
+ "vmov.u8 q1, q2\n\t"
+ "vst2.u8 {q0,q1}, [%[dest_odd]]!\n\t"
+ "bhs 1b\n\t"
+ : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
+ [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
+ [x] "+r" (x)
+ :
+ : "cc", "memory", "d0","d1","d2","d3","d4","d5"
+ );
+ if (x!=0)
+ {
+ // overlap final 16-pixel block to process requested width exactly
+ x = 16-x;
+ u_p -= x/2;
+ v_p -= x/2;
+ y_p_even -= x;
+ y_p_odd -= x;
+ dest_even -= x*2;
+ dest_odd -= x*2;
+ x = 16;
+ // do another 16-pixel block
+ }
+ }
+ while (x!=0);
+
+ dest_even += w * 2;
+ dest_odd += w * 2;
+
+ u_p += ((uv_pitch << 1) - w) >> 1;
+ v_p += ((uv_pitch << 1) - w) >> 1;
+
+ y_p_even += (y_pitch - w) + y_pitch;
+ y_p_odd += (y_pitch - w) + y_pitch;
+ }
+ }
+}
+
--- /tmp/omapfb-xv-generic.c 2009-02-03 10:52:18.000000000 +0100
+++ git/src/omapfb-xv-generic.c 2009-02-03 10:52:24.000000000 +0100
@@ -240,7 +240,7 @@
uint8_t *yb = buf;
uint8_t *ub = yb + (src_y_pitch * src_h);
uint8_t *vb = ub + (src_uv_pitch * (src_h / 2));
- uv12_to_uyvy(src_w & ~15,
+ uv12_to_uyvy_neon(src_w & ~15,
src_h & ~15,
src_y_pitch,
src_uv_pitch,
@@ -256,7 +256,7 @@
uint8_t *yb = buf;
uint8_t *vb = yb + (src_y_pitch * src_h);
uint8_t *ub = vb + (src_uv_pitch * (src_h / 2));
- uv12_to_uyvy(src_w & ~15,
+ uv12_to_uyvy_neon(src_w & ~15,
src_h & ~15,
src_y_pitch,
src_uv_pitch,