[xiph-commits] r14835 - trunk/theora/lib/dec/x86

tterribe at svn.xiph.org tterribe at svn.xiph.org
Sun May 4 10:37:16 PDT 2008


Author: tterribe
Date: 2008-05-04 10:37:16 -0700 (Sun, 04 May 2008)
New Revision: 14835

Modified:
   trunk/theora/lib/dec/x86/mmxfrag.c
   trunk/theora/lib/dec/x86/mmxstate.c
Log:
Replace "long" with "ptrdiff_t" in the x86 asm, since win64 is apparently the
 only platform in existence where sizeof(long) < sizeof(void *).
Yes, I knew it was technically possible, but I thought at least on x86 we could
 assume such things did not happen; now I know better.


Modified: trunk/theora/lib/dec/x86/mmxfrag.c
===================================================================
--- trunk/theora/lib/dec/x86/mmxfrag.c	2008-05-04 14:03:21 UTC (rev 14834)
+++ trunk/theora/lib/dec/x86/mmxfrag.c	2008-05-04 17:37:16 UTC (rev 14835)
@@ -21,6 +21,7 @@
   Note: Loops are unrolled for best performance.
   The iteration each instruction belongs to is marked in the comments as #i.*/
 #include "x86int.h"
+#include <stdlib.h>
 
 #if defined(USE_ASM)
 
@@ -133,8 +134,8 @@
     :[residue]"r"(_residue),
      [dst]"r"(_dst),
      [dst4]"r"(_dst+(_dst_ystride<<2)),
-     [dst_ystride]"r"((long)_dst_ystride),
-     [dst_ystride3]"r"((long)_dst_ystride*3)
+     [dst_ystride]"r"((ptrdiff_t)_dst_ystride),
+     [dst_ystride3]"r"((ptrdiff_t)_dst_ystride*3)
     :"memory"
   );
 }
@@ -185,8 +186,8 @@
       /*Advance dst.*/
       "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
       :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
-      :[dst_ystride]"r"((long)_dst_ystride),
-       [src_ystride]"r"((long)_src_ystride)
+      :[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
+       [src_ystride]"r"((ptrdiff_t)_src_ystride)
       :"memory"
     );
   }
@@ -278,7 +279,7 @@
       "lea (%[dst],%[ystride],2),%[dst]\n\t"
      :[dst]"+r"(_dst),[residue]"+r"(_residue),
       [src1]"+r"(_src1),[src2]"+r"(_src2)
-     :[ystride]"r"((long)_dst_ystride)
+     :[ystride]"r"((ptrdiff_t)_dst_ystride)
      :"memory"
     );
   }

Modified: trunk/theora/lib/dec/x86/mmxstate.c
===================================================================
--- trunk/theora/lib/dec/x86/mmxstate.c	2008-05-04 14:03:21 UTC (rev 14834)
+++ trunk/theora/lib/dec/x86/mmxstate.c	2008-05-04 17:37:16 UTC (rev 14835)
@@ -19,6 +19,7 @@
   Originally written by Rudolf Marek.*/
 #include "x86int.h"
 #include "../../internal.h"
+#include <stdlib.h>
 
 #if defined(USE_ASM)
 
@@ -182,9 +183,9 @@
   const int *fragi;
   const int *fragi_end;
   int        dst_framei;
-  long       dst_ystride;
+  ptrdiff_t  dst_ystride;
   int        src_framei;
-  long       src_ystride;
+  ptrdiff_t  src_ystride;
   dst_framei=_state->ref_frame_idx[_dst_frame];
   src_framei=_state->ref_frame_idx[_src_frame];
   dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
@@ -194,14 +195,14 @@
     oc_fragment   *frag;
     unsigned char *dst;
     unsigned char *src;
-    long           esi;
+    ptrdiff_t      s;
     frag=_state->frags+*fragi;
     dst=frag->buffer[dst_framei];
     src=frag->buffer[src_framei];
     __asm__ __volatile__(
       /*src+0*src_ystride*/
       "movq (%[src]),%%mm0\n\t"
-      /*esi=src_ystride*3*/
+      /*s=src_ystride*3*/
       "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
       /*src+1*src_ystride*/
       "movq (%[src],%[src_ystride]),%%mm1\n\t"
@@ -211,7 +212,7 @@
       "movq (%[src],%[s]),%%mm3\n\t"
       /*dst+0*dst_ystride*/
       "movq %%mm0,(%[dst])\n\t"
-      /*esi=dst_ystride*3*/
+      /*s=dst_ystride*3*/
       "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
       /*dst+1*dst_ystride*/
       "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@@ -225,7 +226,7 @@
       "lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
       /*src+0*src_ystride*/
       "movq (%[src]),%%mm0\n\t"
-      /*esi=src_ystride*3*/
+      /*s=src_ystride*3*/
       "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
       /*src+1*src_ystride*/
       "movq (%[src],%[src_ystride]),%%mm1\n\t"
@@ -235,7 +236,7 @@
       "movq (%[src],%[s]),%%mm3\n\t"
       /*dst+0*dst_ystride*/
       "movq %%mm0,(%[dst])\n\t"
-      /*esi=dst_ystride*3*/
+      /*s=dst_ystride*3*/
       "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
       /*dst+1*dst_ystride*/
       "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@@ -243,7 +244,7 @@
       "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
       /*dst+3*dst_ystride*/
       "movq %%mm3,(%[dst],%[s])\n\t"
-      :[s]"=&S"(esi)
+      :[s]"=&r"(s)
       :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
        [src_ystride]"r"(src_ystride)
       :"memory"
@@ -255,12 +256,12 @@
 
 static void loop_filter_v(unsigned char *_pix,int _ystride,
  const ogg_int16_t *_ll){
-  long esi;
+  ptrdiff_t s;
   _pix-=_ystride*2;
   __asm__ __volatile__(
     /*mm0=0*/
     "pxor %%mm0,%%mm0\n\t"
-    /*esi=_ystride*3*/
+    /*s=_ystride*3*/
     "lea (%[ystride],%[ystride],2),%[s]\n\t"
     /*mm7=_pix[0...8]*/
     "movq (%[pix]),%%mm7\n\t"
@@ -427,8 +428,8 @@
     /*Write it back out.*/
     "movq %%mm4,(%[pix],%[ystride])\n\t"
     "movq %%mm1,(%[pix],%[ystride],2)\n\t"
-    :[s]"=&S"(esi)
-    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll)
+    :[s]"=&r"(s)
+    :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll)
     :"memory"
   );
 }
@@ -437,14 +438,16 @@
   Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
    four p0's to one register we must transpose the values in four mmx regs.
   When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
+static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride,
  const ogg_int16_t *_ll){
-  long esi;
-  long edi;
+  ptrdiff_t s;
+  /*d doesn't technically need to be 64-bit on x86-64, but making it so will
+     help avoid partial register stalls.*/
+  ptrdiff_t d;
   __asm__ __volatile__(
     /*x x x x 3 2 1 0*/
     "movd (%[pix]),%%mm0\n\t"
-    /*esi=_ystride*3*/
+    /*s=_ystride*3*/
     "lea (%[ystride],%[ystride],2),%[s]\n\t"
     /*x x x x 7 6 5 4*/
     "movd (%[pix],%[ystride]),%%mm1\n\t"
@@ -557,19 +560,19 @@
     "packuswb %%mm7,%%mm4\n\t"
     /*mm5=E D A 9 6 5 2 1*/
     "punpcklbw %%mm4,%%mm5\n\t"
-    /*edi=6 5 2 1*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix])\n\t"
+    /*d=6 5 2 1*/
+    "movd %%mm5,%[d]\n\t"
+    "movw %w[d],1(%[pix])\n\t"
     /*Why is there such a big stall here?*/
     "psrlq $32,%%mm5\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride])\n\t"
-    /*edi=E D A 9*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride],2)\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[s])\n\t"
-    :[s]"=&S"(esi),[d]"=&D"(edi),
+    "shr $16,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[ystride])\n\t"
+    /*d=E D A 9*/
+    "movd %%mm5,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[ystride],2)\n\t"
+    "shr $16,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[s])\n\t"
+    :[s]"=&r"(s),[d]"=&r"(d),
      [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
     :
     :"memory"



More information about the commits mailing list