[xiph-commits] r11340 - branches/theora-mmx/lib/x86_64
msmith at svn.xiph.org
msmith at svn.xiph.org
Thu May 4 02:05:32 PDT 2006
Author: msmith
Date: 2006-05-04 02:05:27 -0700 (Thu, 04 May 2006)
New Revision: 11340
Modified:
branches/theora-mmx/lib/x86_64/dsp_mmx.c
branches/theora-mmx/lib/x86_64/fdct_mmx.c
branches/theora-mmx/lib/x86_64/recon_mmx.c
Log:
Patch from Dan Lenski to make x86-64 mmx code work correctly (PIC) when loaded
as a shared library.
Modified: branches/theora-mmx/lib/x86_64/dsp_mmx.c
===================================================================
--- branches/theora-mmx/lib/x86_64/dsp_mmx.c 2006-05-04 01:33:04 UTC (rev 11339)
+++ branches/theora-mmx/lib/x86_64/dsp_mmx.c 2006-05-04 09:05:27 UTC (rev 11340)
@@ -20,13 +20,6 @@
static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
-#if defined(__MINGW32__) || defined(__CYGWIN__) || \
- defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define M(a) "_" #a
-#else
-# define M(a) #a
-#endif
-
#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
@@ -77,7 +70,7 @@
" .balign 16 \n\t"
" pxor %%mm7, %%mm7 \n\t"
- " movq "M(V128)", %%mm1 \n\t"
+ " movq %[V128], %%mm1 \n\t"
".rept 8 \n\t"
" movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */
@@ -97,7 +90,8 @@
: "+r" (FiltPtr),
"+r" (DctInputPtr)
- : "m" (PixelsPerLine)
+ : "m" (PixelsPerLine),
+ [V128] "m" (V128)
: "memory"
);
}
Modified: branches/theora-mmx/lib/x86_64/fdct_mmx.c
===================================================================
--- branches/theora-mmx/lib/x86_64/fdct_mmx.c 2006-05-04 01:33:04 UTC (rev 11339)
+++ branches/theora-mmx/lib/x86_64/fdct_mmx.c 2006-05-04 09:05:27 UTC (rev 11340)
@@ -86,7 +86,7 @@
" movq %%mm2, %%mm0 \n\t" /* make a copy */ \
" paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \
\
- " pmulhw "M(xC4S4)", %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
+ " pmulhw %[xC4S4], %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
" paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \
" psrlw $15, %%mm2 \n\t" \
" paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \
@@ -95,7 +95,7 @@
" movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \
\
" movq %%mm3, %%mm0 \n\t" \
- " pmulhw "M(xC4S4)", %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
+ " pmulhw %[xC4S4], %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
\
" psrlw $15, %%mm2 \n\t" \
" paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \
@@ -104,7 +104,7 @@
" movq %%mm3," #ip0 " \n\t" \
/* ------------------------------------------------------------------- */ \
" movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \
- " pmulhw "M(xC2S6)", %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
+ " pmulhw %[xC2S6], %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
\
" movq " #temp ", %%mm2 \n\t" \
" movq %%mm2, %%mm0 \n\t" \
@@ -116,7 +116,7 @@
" movq %%mm5, %%mm0 \n\t" \
\
" movq %%mm5, %%mm2 \n\t" \
- " pmulhw "M(xC6S2)", %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \
+ " pmulhw %[xC6S2], %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \
\
" psrlw $15, %%mm2 \n\t" \
" paddw %%mm2, %%mm0 \n\t" /* Truncated */ \
@@ -127,7 +127,7 @@
" movq %%mm5, %%mm0 \n\t" \
" movq %%mm5, %%mm2 \n\t" \
\
- " pmulhw "M(xC2S6)", %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
+ " pmulhw %[xC2S6], %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
" psrlw $15, %%mm2 \n\t" \
\
" movq " #temp ", %%mm3 \n\t" \
@@ -136,7 +136,7 @@
" paddw %%mm2, %%mm5 \n\t" /* Truncated */ \
" movq %%mm3, %%mm2 \n\t" \
\
- " pmulhw "M(xC6S2)", %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \
+ " pmulhw %[xC6S2], %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \
" psrlw $15, %%mm2 \n\t" \
\
" paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
@@ -144,7 +144,7 @@
\
" movq %%mm3," #ip6 " \n\t" \
/* ------------------------------------------------------------------- */ \
- " movq "M(xC4S4)", %%mm0 \n\t" \
+ " movq %[xC4S4], %%mm0 \n\t" \
" movq %%mm1, %%mm2 \n\t" \
" movq %%mm1, %%mm3 \n\t" \
\
@@ -174,13 +174,13 @@
" paddsw %%mm1, %%mm1 \n\t" \
" paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \
/* ------------------------------------------------------------------- */ \
- " movq "M(xC1S7)", %%mm7 \n\t" \
+ " movq %[xC1S7], %%mm7 \n\t" \
" movq %%mm1, %%mm2 \n\t" \
\
" movq %%mm1, %%mm3 \n\t" \
" pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \
\
- " movq "M(xC7S1)", %%mm7 \n\t" \
+ " movq %[xC7S1], %%mm7 \n\t" \
" psrlw $15, %%mm2 \n\t" \
\
" paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \
@@ -192,10 +192,10 @@
" movq %%mm0, %%mm5 \n\t" \
" movq %%mm0, %%mm2 \n\t" \
\
- " movq "M(xC1S7)", %%mm7 \n\t" \
+ " movq %[xC1S7], %%mm7 \n\t" \
" pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \
\
- " movq "M(xC7S1)", %%mm7 \n\t" \
+ " movq %[xC7S1], %%mm7 \n\t" \
" psrlw $15, %%mm2 \n\t" \
\
" paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \
@@ -210,8 +210,8 @@
" movq %%mm1," #ip1 " \n\t" \
" movq %%mm3," #ip7 " \n\t" \
/* ------------------------------------------------------------------- */ \
- " movq "M(xC3S5)", %%mm0 \n\t" \
- " movq "M(xC5S3)", %%mm1 \n\t" \
+ " movq %[xC3S5], %%mm0 \n\t" \
+ " movq %[xC5S3], %%mm1 \n\t" \
\
" movq %%mm6, %%mm5 \n\t" \
" movq %%mm6, %%mm7 \n\t" \
@@ -329,7 +329,14 @@
: "+r" (InputData),
"+r" (OutputData)
- : "r" (temp)
+ : "r" (temp),
+ [xC1S7] "m" (xC1S7), /* gcc 3.1+ allows named asm parameters */
+ [xC2S6] "m" (xC2S6),
+ [xC3S5] "m" (xC3S5),
+ [xC4S4] "m" (xC4S4),
+ [xC5S3] "m" (xC5S3),
+ [xC6S2] "m" (xC6S2),
+ [xC7S1] "m" (xC7S1)
: "memory"
);
}
Modified: branches/theora-mmx/lib/x86_64/recon_mmx.c
===================================================================
--- branches/theora-mmx/lib/x86_64/recon_mmx.c 2006-05-04 01:33:04 UTC (rev 11339)
+++ branches/theora-mmx/lib/x86_64/recon_mmx.c 2006-05-04 09:05:27 UTC (rev 11340)
@@ -19,13 +19,6 @@
static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-#if defined(__MINGW32__) || defined(__CYGWIN__) || \
- defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define M(a) "_" #a
-#else
-# define M(a) #a
-#endif
-
static void copy8x8__mmx (unsigned char *src,
unsigned char *dest,
ogg_uint64_t stride)
@@ -71,7 +64,7 @@
__asm__ __volatile__ (
" .balign 16 \n\t"
- " movq "M(V128)", %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */
+ " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */
" lea 128(%1), %%rdi \n\t" /* Endpoint in input buffer */
"1: \n\t"
@@ -89,7 +82,8 @@
" jc 1b \n\t" /* Loop back if we are not done */
: "+r" (ReconPtr)
: "r" (ChangePtr),
- "r" (LineStep)
+ "r" (LineStep),
+ [V128] "m" (V128)
: "memory", "rdi"
);
}
More information about the commits
mailing list