[xiph-commits] r14146 - in branches/theora-thusnelda/lib/enc: . x86_32 x86_32_vs x86_64

xiphmont at svn.xiph.org xiphmont at svn.xiph.org
Wed Nov 14 23:45:07 PST 2007


Author: xiphmont
Date: 2007-11-14 23:45:07 -0800 (Wed, 14 Nov 2007)
New Revision: 14146

Modified:
   branches/theora-thusnelda/lib/enc/dsp.h
   branches/theora-thusnelda/lib/enc/reconstruct.c
   branches/theora-thusnelda/lib/enc/x86_32/recon_mmx.c
   branches/theora-thusnelda/lib/enc/x86_32_vs/recon_mmx.c
   branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c
Log:
asm updates; remove eliminated function, add asm for one of the new
(actually mutated) functions.

God I hope it's right.



Modified: branches/theora-thusnelda/lib/enc/dsp.h
===================================================================
--- branches/theora-thusnelda/lib/enc/dsp.h	2007-11-15 07:08:00 UTC (rev 14145)
+++ branches/theora-thusnelda/lib/enc/dsp.h	2007-11-15 07:45:07 UTC (rev 14146)
@@ -50,16 +50,6 @@
   void   (*recon8x8)    (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, 
 			 ogg_uint32_t LineStep);
 
-  void   (*recon_intra8x8)    (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, 
-                     ogg_uint32_t LineStep);
-
-  void   (*recon_inter8x8)    (unsigned char *ReconPtr, unsigned char *RefPtr, 
-                     ogg_int16_t *ChangePtr, ogg_uint32_t LineStep);
-
-  void   (*recon_inter8x8_half)  (unsigned char *ReconPtr, unsigned char *RefPtr1, 
-           unsigned char *RefPtr2, ogg_int16_t *ChangePtr, 
-         ogg_uint32_t LineStep);
-
   void   (*fdct_short)          (ogg_int16_t *InputData, ogg_int16_t *OutputData);
 
   ogg_uint32_t (*row_sad8)  (unsigned char *Src1, unsigned char *Src2);
@@ -138,14 +128,6 @@
 
 #define dsp_recon8x8(funcs,ptr1,ptr2,str1) (funcs.recon8x8 (ptr1,ptr2,str1))
 
-#define dsp_recon_intra8x8(funcs,ptr1,ptr2,str1) (funcs.recon_intra8x8 (ptr1,ptr2,str1))
-
-#define dsp_recon_inter8x8(funcs,ptr1,ptr2,ptr3,str1) \
-  (funcs.recon_inter8x8 (ptr1,ptr2,ptr3,str1))
-
-#define dsp_recon_inter8x8_half(funcs,ptr1,ptr2,ptr3,ptr4,str1) \
-  (funcs.recon_inter8x8_half (ptr1,ptr2,ptr3,ptr4,str1))
-
 #define dsp_fdct_short(funcs,in,out) (funcs.fdct_short (in,out))
 
 #define dsp_row_sad8(funcs,ptr1,ptr2) (funcs.row_sad8 (ptr1,ptr2))

Modified: branches/theora-thusnelda/lib/enc/reconstruct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/reconstruct.c	2007-11-15 07:08:00 UTC (rev 14145)
+++ branches/theora-thusnelda/lib/enc/reconstruct.c	2007-11-15 07:45:07 UTC (rev 14146)
@@ -54,49 +54,6 @@
 }
 
 
-static void recon_intra8x8__c (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                               ogg_uint32_t LineStep)
-{
-  ogg_uint32_t i;
-
-  for (i = 8; i; i--){
-    /* Convert the data back to 8 bit unsigned */
-    /* Saturate the output to unsigend 8 bit values */
-    ReconPtr[0] = clamp255( ChangePtr[0] + 128 );
-    ReconPtr[1] = clamp255( ChangePtr[1] + 128 );
-    ReconPtr[2] = clamp255( ChangePtr[2] + 128 );
-    ReconPtr[3] = clamp255( ChangePtr[3] + 128 );
-    ReconPtr[4] = clamp255( ChangePtr[4] + 128 );
-    ReconPtr[5] = clamp255( ChangePtr[5] + 128 );
-    ReconPtr[6] = clamp255( ChangePtr[6] + 128 );
-    ReconPtr[7] = clamp255( ChangePtr[7] + 128 );
-
-    ReconPtr += LineStep;
-    ChangePtr += 8;
-  }
-}
-
-static void recon_inter8x8__c (unsigned char *ReconPtr, unsigned char *RefPtr,
-          ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  ogg_uint32_t i;
-
-  for (i = 8; i; i--){
-    ReconPtr[0] = clamp255(RefPtr[0] + ChangePtr[0]);
-    ReconPtr[1] = clamp255(RefPtr[1] + ChangePtr[1]);
-    ReconPtr[2] = clamp255(RefPtr[2] + ChangePtr[2]);
-    ReconPtr[3] = clamp255(RefPtr[3] + ChangePtr[3]);
-    ReconPtr[4] = clamp255(RefPtr[4] + ChangePtr[4]);
-    ReconPtr[5] = clamp255(RefPtr[5] + ChangePtr[5]);
-    ReconPtr[6] = clamp255(RefPtr[6] + ChangePtr[6]);
-    ReconPtr[7] = clamp255(RefPtr[7] + ChangePtr[7]);
-
-    ChangePtr += 8;
-    ReconPtr += LineStep;
-    RefPtr += LineStep;
-  }
-}
-
 static void recon8x8__c (unsigned char *ReconPtr, 
 			 ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
 {
@@ -117,37 +74,11 @@
   }
 }
 
-static void recon_inter8x8_half__c (unsigned char *ReconPtr, unsigned char *RefPtr1,
-               unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-         ogg_uint32_t LineStep)
-{
-  ogg_uint32_t  i;
-
-  for (i = 8; i; i--){
-    ReconPtr[0] = clamp255((((int)RefPtr1[0] + (int)RefPtr2[0]) >> 1) + ChangePtr[0] );
-    ReconPtr[1] = clamp255((((int)RefPtr1[1] + (int)RefPtr2[1]) >> 1) + ChangePtr[1] );
-    ReconPtr[2] = clamp255((((int)RefPtr1[2] + (int)RefPtr2[2]) >> 1) + ChangePtr[2] );
-    ReconPtr[3] = clamp255((((int)RefPtr1[3] + (int)RefPtr2[3]) >> 1) + ChangePtr[3] );
-    ReconPtr[4] = clamp255((((int)RefPtr1[4] + (int)RefPtr2[4]) >> 1) + ChangePtr[4] );
-    ReconPtr[5] = clamp255((((int)RefPtr1[5] + (int)RefPtr2[5]) >> 1) + ChangePtr[5] );
-    ReconPtr[6] = clamp255((((int)RefPtr1[6] + (int)RefPtr2[6]) >> 1) + ChangePtr[6] );
-    ReconPtr[7] = clamp255((((int)RefPtr1[7] + (int)RefPtr2[7]) >> 1) + ChangePtr[7] );
-
-    ChangePtr += 8;
-    ReconPtr += LineStep;
-    RefPtr1 += LineStep;
-    RefPtr2 += LineStep;
-  }
-}
-
 void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
 {
   funcs->copy8x8 = copy8x8__c;
   funcs->copy8x8_half = copy8x8_half__c;
   funcs->recon8x8 = recon8x8__c;
-  funcs->recon_intra8x8 = recon_intra8x8__c;
-  funcs->recon_inter8x8 = recon_inter8x8__c;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__c;
 #if defined(USE_ASM)
   if (cpu_flags & OC_CPU_X86_MMX) {
     dsp_mmx_recon_init(funcs);

Modified: branches/theora-thusnelda/lib/enc/x86_32/recon_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_32/recon_mmx.c	2007-11-15 07:08:00 UTC (rev 14145)
+++ branches/theora-thusnelda/lib/enc/x86_32/recon_mmx.c	2007-11-15 07:45:07 UTC (rev 14146)
@@ -60,47 +60,18 @@
   );
 }
 
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-		      ogg_uint32_t LineStep)
+static void recon8x8__mmx (unsigned char *ReconPtr,
+			   ogg_int16_t *ChangePtr, 
+			   ogg_uint32_t LineStep)
 {
   __asm__ __volatile__ (
     "  .p2align 4                      \n\t"
 
-    "  movq        %[V128], %%mm0      \n\t" /* Set mm0 to 0x8080808080808080 */
-
-    "  lea         128(%1), %%edi      \n\t" /* Endpoint in input buffer */
-    "1:                                \n\t" 
-    "  movq         (%1), %%mm2        \n\t" /* First four input values */
-
-    "  packsswb    8(%1), %%mm2        \n\t" /* pack with next(high) four values */
-    "  por         %%mm0, %%mm0        \n\t" 
-    "  pxor        %%mm0, %%mm2        \n\t" /* Convert result to unsigned (same as add 128) */
-    "  lea         16(%1), %1          \n\t" /* Step source buffer */
-    "  cmp         %%edi, %1           \n\t" /* are we done */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store results */
-
-    "  lea         (%0, %2), %0        \n\t" /* Step output buffer */
-    "  jc          1b                  \n\t" /* Loop back if we are not done */
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (LineStep),
-        [V128] "m" (V128)
-      : "memory", "edi"
-  );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-		      ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
     "  pxor        %%mm0, %%mm0        \n\t"
     "  lea         128(%1), %%edi      \n\t"
 
     "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
+    "  movq        (%0), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
 
     "  movq        (%1), %%mm4         \n\t" /* first 4 changes */
     "  movq        %%mm2, %%mm3        \n\t"
@@ -109,7 +80,6 @@
     "  paddsw      %%mm4, %%mm2        \n\t" /* add in first 4 changes */
     "  punpckhbw   %%mm0, %%mm3        \n\t" /* turn last 4 refs into positive 16-bit #s */
     "  paddsw      %%mm5, %%mm3        \n\t" /* add in last 4 changes */
-    "  add         %3, %2              \n\t" /* next row of reference pixels */
     "  packuswb    %%mm3, %%mm2        \n\t" /* pack result to unsigned 8-bit values */
     "  lea         16(%1), %1          \n\t" /* next row of changes */
     "  cmp         %%edi, %1            \n\t" /* are we done? */
@@ -120,64 +90,16 @@
     "  jc          1b                  \n\t"
       : "+r" (ReconPtr)
       : "r" (ChangePtr),
-        "r" (RefPtr),
-        "r" (LineStep)
+        "r" (LineStep),
       : "memory", "edi"
   );
 }
 
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-		           unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-			   ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%edi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-    "  movq        (%3), %%mm4         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* mm2 = start ref1 as positive 16-bit #s */
-    "  movq        %%mm4, %%mm5        \n\t"
-    "  movq        (%1), %%mm6         \n\t" /* first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* mm3 = end ref1 as positive 16-bit #s */
-    "  movq        8(%1), %%mm7        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm4        \n\t" /* mm4 = start ref2 as positive 16-bit #s */
-    "  punpckhbw   %%mm0, %%mm5        \n\t" /* mm5 = end ref2 as positive 16-bit #s */
-    "  paddw       %%mm4, %%mm2        \n\t" /* mm2 = start (ref1 + ref2) */
-    "  paddw       %%mm5, %%mm3        \n\t" /* mm3 = end (ref1 + ref2) */
-    "  psrlw       $1, %%mm2           \n\t" /* mm2 = start (ref1 + ref2)/2 */
-    "  psrlw       $1, %%mm3           \n\t" /* mm3 = end (ref1 + ref2)/2 */
-    "  paddw       %%mm6, %%mm2        \n\t" /* add changes to start */
-    "  paddw       %%mm7, %%mm3        \n\t" /* add changes to end */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack start|end to unsigned 8-bit */
-    "  add         %4, %2              \n\t" /* next row of reference pixels */
-    "  add         %4, %3              \n\t" /* next row of reference pixels */
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-    "  add         %4, %0              \n\t" /* next row of output */
-    "  cmp         %%edi, %1           \n\t" /* are we done? */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr1),
-        "r" (RefPtr2),
-        "m" (LineStep)
-      : "memory", "edi"
-  );
-}
-
 void dsp_mmx_recon_init(DspFunctions *funcs)
 {
   TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n");
   funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
+  funcs->recon8x8 = recon8x8__mmx;
 }
 
 #endif /* USE_ASM */

Modified: branches/theora-thusnelda/lib/enc/x86_32_vs/recon_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_32_vs/recon_mmx.c	2007-11-15 07:08:00 UTC (rev 14145)
+++ branches/theora-thusnelda/lib/enc/x86_32_vs/recon_mmx.c	2007-11-15 07:45:07 UTC (rev 14146)
@@ -57,72 +57,32 @@
 
 }
 
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-		      ogg_uint32_t LineStep)
+static void recon8x8__mmx (unsigned char *ReconPtr, 
+			   ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
 {
 
     __asm {
-        align 16
 
-        mov         eax, ReconPtr
-        mov         ebx, ChangePtr
-        mov         ecx, LineStep
-
-        movq		mm0, V128
-
-        lea		    edi, [128 + ebx]
-    loop_start:	
-        movq		mm2, [ebx]
-
-        packsswb	mm2, [8 + ebx]
-        por		    mm0, mm0
-        pxor		mm2, mm0
-        lea		    ebx, [16 + ebx]
-        cmp		    ebx, edi
-
-        movq		[eax], mm2
-
-
-
-        lea		    eax, [eax + ecx]
-        jc		    loop_start
-
-
-    };
-    
-}
-
-
-
-
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-		      ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-
-    __asm {
-
         align 16
 
         mov         eax, ReconPtr
         mov         ebx, ChangePtr
         mov         ecx, LineStep
-        mov         edx, RefPtr
     
         pxor		mm0, mm0
         lea		    edi, [128 + ebx]
 
     loop_start:
-        movq		mm2, [edx]
+        movq		mm2, [eax]
 
         movq		mm4, [ebx]
         movq		mm3, mm2
         movq		mm5, [8 + ebx]
+
         punpcklbw	mm2, mm0
         paddsw		mm2, mm4
         punpckhbw	mm3, mm0
         paddsw		mm3, mm5
-        add		    edx, ecx
         packuswb	mm2, mm3
         lea		    ebx, [16 + ebx]
         cmp		    ebx, edi
@@ -135,64 +95,10 @@
     };
 }
 
-
-
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-		           unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-			   ogg_uint32_t LineStep)
-{
-    __asm {
-        align 16
-
-        mov     eax, ReconPtr
-        mov     ebx, ChangePtr
-        mov     ecx, RefPtr1
-        mov     edx, RefPtr2
-                
-        pxor		mm0, mm0
-        lea		edi, [128 + ebx]
-
-    loop_start:
-        movq		mm2, [ecx]
-        movq		mm4, [edx]
-
-        movq		mm3, mm2
-        punpcklbw		mm2, mm0
-        movq		mm5, mm4
-        movq		mm6, [ebx]
-        punpckhbw		mm3, mm0
-        movq		mm7, [8 + ebx]
-        punpcklbw		mm4, mm0
-        punpckhbw		mm5, mm0
-        paddw		mm2, mm4
-        paddw		mm3, mm5
-        psrlw		mm2, 1
-        psrlw		mm3, 1
-        paddw		mm2, mm6
-        paddw		mm3, mm7
-        lea		ebx, [16 + ebx]
-        packuswb		mm2, mm3
-        add		ecx, LineStep
-        add		edx, LineStep
-        movq		[eax], mm2
-        add		eax, LineStep
-        cmp		ebx, edi
-        jc		loop_start
-
-    };
-
-}
-
-
-
-
 void dsp_mmx_recon_init(DspFunctions *funcs)
 {
   TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n");
   funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
+  funcs->recon8x8 = recon8x8__mmx;
 }
 

Modified: branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c	2007-11-15 07:08:00 UTC (rev 14145)
+++ branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c	2007-11-15 07:45:07 UTC (rev 14146)
@@ -62,47 +62,18 @@
   );
 }
 
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                                 ogg_uint32_t LineStep)
+static void recon8x8__mmx (unsigned char *ReconPtr, 
+			   ogg_int16_t *ChangePtr, 
+			   ogg_uint32_t LineStep)
 {
   __asm__ __volatile__ (
     "  .balign 16                      \n\t"
 
-    "  movq        %[V128], %%mm0      \n\t" /* Set mm0 to 0x8080808080808080 */
-
-    "  lea         128(%1), %%rdi      \n\t" /* Endpoint in input buffer */
-    "1:                                \n\t" 
-    "  movq         (%1), %%mm2        \n\t" /* First four input values */
-
-    "  packsswb    8(%1), %%mm2        \n\t" /* pack with next(high) four values */
-    "  por         %%mm0, %%mm0        \n\t" 
-    "  pxor        %%mm0, %%mm2        \n\t" /* Convert result to unsigned (same as add 128) */
-    "  lea         16(%1), %1          \n\t" /* Step source buffer */
-    "  cmp         %%rdi, %1           \n\t" /* are we done */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store results */
-
-    "  lea         (%0, %2), %0        \n\t" /* Step output buffer */
-    "  jc          1b                  \n\t" /* Loop back if we are not done */
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" ((ogg_uint64_t)LineStep),
-        [V128] "m" (V128)
-      : "memory", "rdi"
-  );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-                                 ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
     "  pxor        %%mm0, %%mm0        \n\t"
     "  lea         128(%1), %%rdi      \n\t"
 
     "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
+    "  movq        (%0), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
 
     "  movq        (%1), %%mm4         \n\t" /* first 4 changes */
     "  movq        %%mm2, %%mm3        \n\t"
@@ -111,75 +82,27 @@
     "  paddsw      %%mm4, %%mm2        \n\t" /* add in first 4 changes */
     "  punpckhbw   %%mm0, %%mm3        \n\t" /* turn last 4 refs into positive 16-bit #s */
     "  paddsw      %%mm5, %%mm3        \n\t" /* add in last 4 changes */
-    "  add         %3, %2              \n\t" /* next row of reference pixels */
+
     "  packuswb    %%mm3, %%mm2        \n\t" /* pack result to unsigned 8-bit values */
     "  lea         16(%1), %1          \n\t" /* next row of changes */
     "  cmp         %%rdi, %1           \n\t" /* are we done? */
 
     "  movq        %%mm2, (%0)         \n\t" /* store result */
 
-    "  lea         (%0, %3), %0        \n\t" /* next row of output */
+    "  lea         (%0, %2), %0        \n\t" /* next row of output */
     "  jc          1b                  \n\t"
       : "+r" (ReconPtr)
       : "r" (ChangePtr),
-        "r" (RefPtr),
         "r" ((ogg_uint64_t)LineStep)
       : "memory", "rdi"
   );
 }
 
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-                                      unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-                                      ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%rdi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-    "  movq        (%3), %%mm4         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* mm2 = start ref1 as positive 16-bit #s */
-    "  movq        %%mm4, %%mm5        \n\t"
-    "  movq        (%1), %%mm6         \n\t" /* first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* mm3 = end ref1 as positive 16-bit #s */
-    "  movq        8(%1), %%mm7        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm4        \n\t" /* mm4 = start ref2 as positive 16-bit #s */
-    "  punpckhbw   %%mm0, %%mm5        \n\t" /* mm5 = end ref2 as positive 16-bit #s */
-    "  paddw       %%mm4, %%mm2        \n\t" /* mm2 = start (ref1 + ref2) */
-    "  paddw       %%mm5, %%mm3        \n\t" /* mm3 = end (ref1 + ref2) */
-    "  psrlw       $1, %%mm2           \n\t" /* mm2 = start (ref1 + ref2)/2 */
-    "  psrlw       $1, %%mm3           \n\t" /* mm3 = end (ref1 + ref2)/2 */
-    "  paddw       %%mm6, %%mm2        \n\t" /* add changes to start */
-    "  paddw       %%mm7, %%mm3        \n\t" /* add changes to end */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack start|end to unsigned 8-bit */
-    "  add         %4, %2              \n\t" /* next row of reference pixels */
-    "  add         %4, %3              \n\t" /* next row of reference pixels */
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-    "  add         %4, %0              \n\t" /* next row of output */
-    "  cmp         %%rdi, %1           \n\t" /* are we done? */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr1),
-        "r" (RefPtr2),
-        "r" ((ogg_uint64_t)LineStep)
-      : "memory", "rdi"
-  );
-}
-
 void dsp_mmx_recon_init(DspFunctions *funcs)
 {
   TH_DEBUG("enabling accelerated x86_64 mmx recon functions.\n");
   funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
+  funcs->recon8x8 = recon8x8__mmx;
 }
 
 #endif /* USE_ASM */



More information about the commits mailing list