[xiph-commits] r11513 - in branches/theora-playtime/lib: . x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Sun Jun 4 02:46:49 PDT 2006
Author: illiminable
Date: 2006-06-04 02:46:34 -0700 (Sun, 04 Jun 2006)
New Revision: 11513
Modified:
branches/theora-playtime/lib/codec_internal.h
branches/theora-playtime/lib/dct_decode.c
branches/theora-playtime/lib/dsp.c
branches/theora-playtime/lib/dsp.h
branches/theora-playtime/lib/idct.c
branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
Log:
* Remove the idct prototypes from codec_internal.h
* Change the idct calls in dct_decode to point to the dsp macros
* Add an init function for the idct to dsp.c
* Add prototypes and and macros to dsp.h
* Add initialisation function to idct.c, rename functions to __c
* Remove some lea's from recon_intra8x8
* Add pre-fetches to recon_intra8x8
Modified: branches/theora-playtime/lib/codec_internal.h
===================================================================
--- branches/theora-playtime/lib/codec_internal.h 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/codec_internal.h 2006-06-04 09:46:34 UTC (rev 11513)
@@ -715,7 +715,8 @@
extern void InitPBInstance(PB_INSTANCE *pbi);
extern void ClearPBInstance(PB_INSTANCE *pbi);
-
+/* ZEN::: Removed, they are in the DSP struct now */
+/*
extern void IDctSlow( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ) ;
@@ -727,7 +728,7 @@
extern void IDct1( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData );
-
+*/
extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
ogg_int16_t * ChangePtr, ogg_uint32_t LineStep );
Modified: branches/theora-playtime/lib/dct_decode.c
===================================================================
--- branches/theora-playtime/lib/dct_decode.c 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dct_decode.c 2006-06-04 09:46:34 UTC (rev 11513)
@@ -135,13 +135,17 @@
/* Invert quantisation and DCT to get pixel data. */
switch(pbi->FragCoefEOB[FragmentNumber]){
case 0:case 1:
- IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+ dsp_idct_IDct1( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+ dsp_idct_IDct10( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
default:
- IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* dsp_idct_IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+ dsp_idct_IDctSlow( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+
}
/* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -215,13 +219,16 @@
/* Invert quantisation and DCT to get pixel data. */
switch(pbi->FragCoefEOB[FragmentNumber]){
case 0:case 1:
- IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+ dsp_idct_IDct1( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
+ dsp_idct_IDct10( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
default:
- IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ dsp_idct_IDctSlow( pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ /* IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); */
}
/* Convert fragment number to a pixel offset in a reconstruction buffer. */
Modified: branches/theora-playtime/lib/dsp.c
===================================================================
--- branches/theora-playtime/lib/dsp.c 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dsp.c 2006-06-04 09:46:34 UTC (rev 11513)
@@ -409,6 +409,7 @@
dsp_recon_init (funcs, cpuflags);
dsp_dct_init (funcs, cpuflags);
+ dsp_idct_init (funcs, cpuflags);
#if defined(USE_ASM)
if (cpuflags & CPU_X86_MMX) {
dsp_mmx_init(funcs);
Modified: branches/theora-playtime/lib/dsp.h
===================================================================
--- branches/theora-playtime/lib/dsp.h 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/dsp.h 2006-06-04 09:46:34 UTC (rev 11513)
@@ -19,6 +19,8 @@
#define DSP_H
#include <theora/theora.h>
+/*ZEN::: Added for QLIST_ENTRY */
+//#include "codec_internal.h"
typedef unsigned long int ogg_uint64_t;
typedef struct
@@ -78,6 +80,31 @@
ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
unsigned char *RefDataPtr1,
unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
+ /* iDCT Functions */
+
+
+ void (*IDct1)( ogg_int16_t/*Q_LIST_ENTRY*/ * InputData, ogg_int16_t *QuantMatrix,
+ ogg_int16_t * OutputData );
+
+ void (*dequant_slow10)( ogg_int16_t * dequant_coeffs,
+ ogg_int16_t * quantized_list,
+ ogg_int32_t * DCT_block);
+
+ void (*IDct10)( ogg_int16_t/*Q_LIST_ENTRY*/ * InputData,
+ ogg_int16_t *QuantMatrix,
+ ogg_int16_t * OutputData );
+
+ void (*IDctSlow)( ogg_int16_t/*Q_LIST_ENTRY*/ * InputData,
+ ogg_int16_t *QuantMatrix,
+ ogg_int16_t * OutputData );
+
+ void (*dequant_slow)( ogg_int16_t * dequant_coeffs,
+ ogg_int16_t * quantized_list,
+ ogg_int32_t * DCT_block);
+
+
+
+
} DspFunctions;
extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
@@ -134,5 +161,21 @@
#define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
(funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
+#define dsp_idct_IDct1(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDct1 (ptr1, ptr2, ptr3))
+#define dsp_idct_dequant_slow10(funcs, ptr, ptr2, ptr3) \
+ (funcs.dequant_slow10 (ptr1, ptr2, ptr3))
+
+#define dsp_idct_IDct10(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDct10 (ptr1, ptr2, ptr3))
+
+#define dsp_idct_IDctSlow(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDctSlow (ptr1, ptr2, ptr3))
+
+#define dsp_idct_dequant_slow(funcs, ptr1, ptr2, ptr3) \
+ (funcs.dequant_slow (ptr1, ptr2, ptr3))
+
+
+
#endif /* DSP_H */
Modified: branches/theora-playtime/lib/idct.c
===================================================================
--- branches/theora-playtime/lib/idct.c 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/idct.c 2006-06-04 09:46:34 UTC (rev 11513)
@@ -31,6 +31,9 @@
#define xC7S1 12785
/* compute the 16 bit signed 1D inverse DCT - spec version */
+
+/* THIS IS NEVER CALLED */
+/*
static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
ogg_int32_t t[8], r;
ogg_int16_t *y = InputData;
@@ -108,8 +111,8 @@
x[7] = r;
}
-
-static void dequant_slow( ogg_int16_t * dequant_coeffs,
+*/
+static void dequant_slow__c( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block) {
int i;
@@ -119,7 +122,7 @@
-void IDctSlow( Q_LIST_ENTRY * InputData,
+void IDctSlow__c( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ) {
ogg_int32_t IntermediateData[64];
@@ -132,7 +135,7 @@
int loop;
- dequant_slow( QuantMatrix, InputData, IntermediateData);
+ dequant_slow__c( QuantMatrix, InputData, IntermediateData);
/* Inverse DCT on the rows now */
for ( loop = 0; loop < 8; loop++){
@@ -338,7 +341,7 @@
0 0 0 0 0 0 0 0
*************************/
-static void dequant_slow10( ogg_int16_t * dequant_coeffs,
+static void dequant_slow10__c( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block){
int i;
@@ -348,7 +351,7 @@
}
-void IDct10( Q_LIST_ENTRY * InputData,
+void IDct10__c( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
ogg_int32_t IntermediateData[64];
@@ -361,7 +364,7 @@
int loop;
- dequant_slow10( QuantMatrix, InputData, IntermediateData);
+ dequant_slow10__c( QuantMatrix, InputData, IntermediateData);
/* Inverse DCT on the rows now */
for ( loop = 0; loop < 4; loop++){
@@ -540,7 +543,7 @@
0 0 0 0 0 0 0 0
**************************/
-void IDct1( Q_LIST_ENTRY * InputData,
+void IDct1__c( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
int loop;
@@ -553,3 +556,23 @@
OutputData[loop]=OutD;
}
+
+
+void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+ /* TODO::: Match function order */
+ funcs->IDct1 = IDct1__c;
+ funcs->IDct10 = IDct10__c;
+ funcs->dequant_slow10 = dequant_slow10__c;
+ funcs->IDctSlow = IDctSlow__c;
+ funcs->dequant_slow = dequant_slow__c;
+#if defined(USE_ASM)
+ //if (cpu_flags & CPU_X86_MMX) {
+ // dsp_mmx_idct_init(funcs);
+ //}
+
+ //if (cpu_flags & CPU_X86_SSE2) {
+ // dsp_sse2_idct_init(funcs);
+ //}
+#endif
+}
\ No newline at end of file
Modified: branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-04 06:43:35 UTC (rev 11512)
+++ branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-04 09:46:34 UTC (rev 11513)
@@ -54,6 +54,7 @@
__asm {
align 16
+
/* Load the parameters into the general registers */
mov eax, src
mov ebx, dest
@@ -152,7 +153,12 @@
mov ecx, LineStep
mov edx, V128x16Ptr
+ prefetchnta [ebx]
+ prefetchnta [ebx + 32]
+ prefetchnta [ebx + 64]
+ prefetchnta [ebx + 96]
+
movdqa xmm7, [edx]
/* 8 lots of int16 per register on the first mov */
/* Then packs those 8 + another 8 down to 16x 8 bits */
@@ -164,23 +170,23 @@
movdqa xmm6, [ebx + 16]
packsswb xmm0, xmm6 /*[ebx + 16]*/
pxor xmm0, xmm7
- lea ebx, [ebx + 32]
+ //lea ebx, [ebx + 32]
/* Iteration 2 - xmm1*/
- movdqa xmm1, [ebx]
- packsswb xmm1, [ebx + 16]
+ movdqa xmm1, [ebx + 32]
+ packsswb xmm1, [ebx + 48]
pxor xmm1, xmm7
- lea ebx, [ebx + 32]
+ //lea ebx, [ebx + 32]
/* Iteration 3 - xmm2 */
- movdqa xmm2, [ebx]
- packsswb xmm2, [ebx + 16]
+ movdqa xmm2, [ebx + 64]
+ packsswb xmm2, [ebx + 80]
pxor xmm2, xmm7
- lea ebx, [ebx + 32]
+ //lea ebx, [ebx + 32]
/* Iteration 4 - xmm3 */
- movdqa xmm3, [ebx]
- packsswb xmm3, [ebx + 16]
+ movdqa xmm3, [ebx + 96]
+ packsswb xmm3, [ebx + 112]
pxor xmm3, xmm7
/* lea ebx, [ebx + 16] */
More information about the commits
mailing list