[xiph-commits] r11513 - in branches/theora-playtime/lib: . x86_32_vs

illiminable at svn.xiph.org illiminable at svn.xiph.org
Sun Jun 4 02:46:49 PDT 2006


Author: illiminable
Date: 2006-06-04 02:46:34 -0700 (Sun, 04 Jun 2006)
New Revision: 11513

Modified:
   branches/theora-playtime/lib/codec_internal.h
   branches/theora-playtime/lib/dct_decode.c
   branches/theora-playtime/lib/dsp.c
   branches/theora-playtime/lib/dsp.h
   branches/theora-playtime/lib/idct.c
   branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
Log:
* Remove the idct prototypes from codec_internal.h
* Change the idct calls in dct_decode to point to the dsp macros
* Add an init function for the idct to dsp.c
* Add prototypes and and macros to dsp.h
* Add initialisation function to idct.c, rename functions to __c
* Remove some lea's from recon_intra8x8
* Add pre-fetches to recon_intra8x8


Modified: branches/theora-playtime/lib/codec_internal.h
===================================================================
--- branches/theora-playtime/lib/codec_internal.h	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/codec_internal.h	2006-06-04 09:46:34 UTC (rev 11513)
@@ -715,7 +715,8 @@
 extern void InitPBInstance(PB_INSTANCE *pbi);
 extern void ClearPBInstance(PB_INSTANCE *pbi);
 
-
+/* ZEN::: Removed, they are in the DSP struct now */
+/*
 extern void IDctSlow(  Q_LIST_ENTRY * InputData,
                        ogg_int16_t *QuantMatrix,
                        ogg_int16_t * OutputData ) ;
@@ -727,7 +728,7 @@
 extern void IDct1( Q_LIST_ENTRY * InputData,
                    ogg_int16_t *QuantMatrix,
                    ogg_int16_t * OutputData );
-
+*/
 extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
                         ogg_int16_t * ChangePtr, ogg_uint32_t LineStep );
 

Modified: branches/theora-playtime/lib/dct_decode.c
===================================================================
--- branches/theora-playtime/lib/dct_decode.c	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dct_decode.c	2006-06-04 09:46:34 UTC (rev 11513)
@@ -135,13 +135,17 @@
   /* Invert quantisation and DCT to get pixel data. */
   switch(pbi->FragCoefEOB[FragmentNumber]){
   case 0:case 1:
-    IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    /* IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+    dsp_idct_IDct1( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
   case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    /* IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+    dsp_idct_IDct10( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
   default:
-    IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+     /* dsp_idct_IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+     dsp_idct_IDctSlow( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); 
+
   }
 
   /* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -215,13 +219,16 @@
   /* Invert quantisation and DCT to get pixel data. */
   switch(pbi->FragCoefEOB[FragmentNumber]){
   case 0:case 1:
-    IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    /* IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+    dsp_idct_IDct1( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
   case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    /* IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+    dsp_idct_IDct10( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
   default:
-    IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    dsp_idct_IDctSlow( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    /* IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
   }
 
   /* Convert fragment number to a pixel offset in a reconstruction buffer. */

Modified: branches/theora-playtime/lib/dsp.c
===================================================================
--- branches/theora-playtime/lib/dsp.c	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dsp.c	2006-06-04 09:46:34 UTC (rev 11513)
@@ -409,6 +409,7 @@
 
   dsp_recon_init (funcs, cpuflags);
   dsp_dct_init (funcs, cpuflags);
+  dsp_idct_init (funcs, cpuflags);
 #if defined(USE_ASM)
   if (cpuflags & CPU_X86_MMX) {
     dsp_mmx_init(funcs);

Modified: branches/theora-playtime/lib/dsp.h
===================================================================
--- branches/theora-playtime/lib/dsp.h	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dsp.h	2006-06-04 09:46:34 UTC (rev 11513)
@@ -19,6 +19,8 @@
 #define DSP_H
 
 #include <theora/theora.h>
+/*ZEN::: Added for QLIST_ENTRY */
+//#include "codec_internal.h"
 typedef unsigned long int ogg_uint64_t;
 
 typedef struct
@@ -78,6 +80,31 @@
   ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
 		                 unsigned char *RefDataPtr1,
 			         unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
+  /* iDCT Functions */
+
+
+  void (*IDct1)( ogg_int16_t/*Q_LIST_ENTRY*/ * InputData, ogg_int16_t *QuantMatrix,
+                        ogg_int16_t * OutputData );
+
+  void (*dequant_slow10)( ogg_int16_t * dequant_coeffs,
+                     ogg_int16_t * quantized_list,
+                     ogg_int32_t * DCT_block);
+
+  void (*IDct10)( ogg_int16_t/*Q_LIST_ENTRY*/ * InputData,
+             ogg_int16_t *QuantMatrix,
+             ogg_int16_t * OutputData );
+
+  void (*IDctSlow)(  ogg_int16_t/*Q_LIST_ENTRY*/ * InputData,
+                ogg_int16_t *QuantMatrix,
+                ogg_int16_t * OutputData );
+
+  void (*dequant_slow)( ogg_int16_t * dequant_coeffs,
+                   ogg_int16_t * quantized_list,
+                   ogg_int32_t * DCT_block);
+
+
+
+
 } DspFunctions;
 
 extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
@@ -134,5 +161,21 @@
 #define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
 	(funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
 
+#define dsp_idct_IDct1(funcs, ptr1, ptr2, ptr3) \
+            (funcs.IDct1 (ptr1, ptr2, ptr3))
 
+#define dsp_idct_dequant_slow10(funcs, ptr, ptr2, ptr3) \
+            (funcs.dequant_slow10 (ptr1, ptr2, ptr3))
+
+#define dsp_idct_IDct10(funcs, ptr1, ptr2, ptr3) \
+            (funcs.IDct10 (ptr1, ptr2, ptr3))
+
+#define dsp_idct_IDctSlow(funcs, ptr1, ptr2, ptr3) \
+            (funcs.IDctSlow (ptr1, ptr2, ptr3))
+
+#define dsp_idct_dequant_slow(funcs, ptr1, ptr2, ptr3) \
+            (funcs.dequant_slow (ptr1, ptr2, ptr3))
+
+
+
 #endif /* DSP_H */

Modified: branches/theora-playtime/lib/idct.c
===================================================================
--- branches/theora-playtime/lib/idct.c	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/idct.c	2006-06-04 09:46:34 UTC (rev 11513)
@@ -31,6 +31,9 @@
 #define xC7S1 12785
 
 /* compute the 16 bit signed 1D inverse DCT - spec version */
+
+/* THIS IS NEVER CALLED */
+/*
 static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
   ogg_int32_t t[8], r;
   ogg_int16_t *y = InputData;
@@ -108,8 +111,8 @@
   x[7] = r;
 
 }
-
-static void dequant_slow( ogg_int16_t * dequant_coeffs,
+*/
+static void dequant_slow__c( ogg_int16_t * dequant_coeffs,
                    ogg_int16_t * quantized_list,
                    ogg_int32_t * DCT_block) {
   int i;
@@ -119,7 +122,7 @@
 
 
 
-void IDctSlow(  Q_LIST_ENTRY * InputData,
+void IDctSlow__c(  Q_LIST_ENTRY * InputData,
                 ogg_int16_t *QuantMatrix,
                 ogg_int16_t * OutputData ) {
   ogg_int32_t IntermediateData[64];
@@ -132,7 +135,7 @@
 
   int loop;
 
-  dequant_slow( QuantMatrix, InputData, IntermediateData);
+  dequant_slow__c( QuantMatrix, InputData, IntermediateData);
 
   /* Inverse DCT on the rows now */
   for ( loop = 0; loop < 8; loop++){
@@ -338,7 +341,7 @@
   0  0  0  0  0  0  0  0
 *************************/
 
-static void dequant_slow10( ogg_int16_t * dequant_coeffs,
+static void dequant_slow10__c( ogg_int16_t * dequant_coeffs,
                      ogg_int16_t * quantized_list,
                      ogg_int32_t * DCT_block){
   int i;
@@ -348,7 +351,7 @@
 
 }
 
-void IDct10( Q_LIST_ENTRY * InputData,
+void IDct10__c( Q_LIST_ENTRY * InputData,
              ogg_int16_t *QuantMatrix,
              ogg_int16_t * OutputData ){
   ogg_int32_t IntermediateData[64];
@@ -361,7 +364,7 @@
 
   int loop;
 
-  dequant_slow10( QuantMatrix, InputData, IntermediateData);
+  dequant_slow10__c( QuantMatrix, InputData, IntermediateData);
 
   /* Inverse DCT on the rows now */
   for ( loop = 0; loop < 4; loop++){
@@ -540,7 +543,7 @@
   0   0   0  0  0  0  0  0
 **************************/
 
-void IDct1( Q_LIST_ENTRY * InputData,
+void IDct1__c( Q_LIST_ENTRY * InputData,
             ogg_int16_t *QuantMatrix,
             ogg_int16_t * OutputData ){
   int loop;
@@ -553,3 +556,23 @@
     OutputData[loop]=OutD;
 
 }
+
+
+void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+    /* TODO::: Match function order */
+  funcs->IDct1 = IDct1__c;
+  funcs->IDct10 = IDct10__c;
+  funcs->dequant_slow10 = dequant_slow10__c;
+  funcs->IDctSlow = IDctSlow__c;
+  funcs->dequant_slow = dequant_slow__c;
+#if defined(USE_ASM)
+  //if (cpu_flags & CPU_X86_MMX) {
+  //  dsp_mmx_idct_init(funcs);
+  //}
+
+  //if (cpu_flags & CPU_X86_SSE2) {
+  //  dsp_sse2_idct_init(funcs);
+  //}
+#endif
+}
\ No newline at end of file

Modified: branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/recon_sse2.c	2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/x86_32_vs/recon_sse2.c	2006-06-04 09:46:34 UTC (rev 11513)
@@ -54,6 +54,7 @@
     __asm {
         align 16
 
+
         /* Load the parameters into the general registers */
         mov         eax, src
         mov         ebx, dest
@@ -152,7 +153,12 @@
         mov     ecx, LineStep
         mov     edx, V128x16Ptr
 
+        prefetchnta    [ebx]
+        prefetchnta    [ebx + 32]
+        prefetchnta    [ebx + 64]
+        prefetchnta    [ebx + 96]
 
+
         movdqa      xmm7, [edx]
         /* 8 lots of int16 per register on the first mov */
         /* Then packs those 8 + another 8 down to 16x 8 bits */
@@ -164,23 +170,23 @@
         movdqa      xmm6, [ebx + 16]
         packsswb    xmm0, xmm6  /*[ebx + 16]*/
         pxor        xmm0, xmm7
-        lea         ebx, [ebx + 32]
+        //lea         ebx, [ebx + 32]
 
         /* Iteration 2 - xmm1*/
-        movdqa      xmm1, [ebx]
-        packsswb    xmm1, [ebx + 16]
+        movdqa      xmm1, [ebx + 32]
+        packsswb    xmm1, [ebx + 48]
         pxor        xmm1, xmm7
-        lea         ebx, [ebx + 32]
+        //lea         ebx, [ebx + 32]
 
         /* Iteration 3 - xmm2 */
-        movdqa      xmm2, [ebx]
-        packsswb    xmm2, [ebx + 16]
+        movdqa      xmm2, [ebx + 64]
+        packsswb    xmm2, [ebx + 80]
         pxor        xmm2, xmm7
-        lea         ebx, [ebx + 32]
+        //lea         ebx, [ebx + 32]
 
         /* Iteration 4 - xmm3 */
-        movdqa      xmm3, [ebx]
-        packsswb    xmm3, [ebx + 16]
+        movdqa      xmm3, [ebx + 96]
+        packsswb    xmm3, [ebx + 112]
         pxor        xmm3, xmm7
         /* lea         ebx, [ebx + 16] */
 



More information about the commits mailing list