[xiph-commits] r15556 - in branches/theora-thusnelda/lib: . enc enc/x86_64
xiphmont at svn.xiph.org
xiphmont at svn.xiph.org
Thu Dec 4 21:40:38 PST 2008
Author: xiphmont
Date: 2008-12-04 21:40:36 -0800 (Thu, 04 Dec 2008)
New Revision: 15556
Modified:
branches/theora-thusnelda/lib/Makefile.am
branches/theora-thusnelda/lib/enc/codec_internal.h
branches/theora-thusnelda/lib/enc/dct_encode.c
branches/theora-thusnelda/lib/enc/dsp.c
branches/theora-thusnelda/lib/enc/dsp.h
branches/theora-thusnelda/lib/enc/encoder_idct.c
branches/theora-thusnelda/lib/enc/encoder_quant.c
branches/theora-thusnelda/lib/enc/encoder_toplevel.c
branches/theora-thusnelda/lib/enc/frinit.c
branches/theora-thusnelda/lib/enc/mcenc.c
branches/theora-thusnelda/lib/enc/mode.c
branches/theora-thusnelda/lib/enc/reconstruct.c
branches/theora-thusnelda/lib/enc/x86_64/dsp_mmx.c
branches/theora-thusnelda/lib/enc/x86_64/dsp_mmxext.c
branches/theora-thusnelda/lib/enc/x86_64/fdct_mmx.c
branches/theora-thusnelda/lib/enc/x86_64/idct_mmx.c
branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c
branches/theora-thusnelda/lib/internal.h
Log:
A rousing round of functional and build cleanup, some const-ing, some more
skip block refinement.
Modified: branches/theora-thusnelda/lib/Makefile.am
===================================================================
--- branches/theora-thusnelda/lib/Makefile.am 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/Makefile.am 2008-12-05 05:40:36 UTC (rev 15556)
@@ -3,17 +3,12 @@
LIBADD = $(OGG_LIBS)
EXTRA_DIST = \
- enc/x86_32/dct_decode_mmx.c \
- enc/x86_32/dsp_mmx.c \
- enc/x86_32/dsp_mmxext.c \
- enc/x86_32/recon_mmx.c \
- enc/x86_32/fdct_mmx.c \
- enc/x86_32/idct_mmx.c \
- enc/x86_64/dsp_mmx.c \
- enc/x86_64/dsp_mmxext.c \
- enc/x86_64/recon_mmx.c \
- enc/x86_64/fdct_mmx.c \
- enc/x86_64/idct_mmx.c \
+ enc/x86/dct_decode_mmx.c \
+ enc/x86/dsp_mmx.c \
+ enc/x86/dsp_mmxext.c \
+ enc/x86/recon_mmx.c \
+ enc/x86/fdct_mmx.c \
+ enc/x86/idct_mmx.c \
enc/x86_32_vs/dsp_mmx.c \
enc/x86_32_vs/fdct_mmx.c \
enc/x86_32_vs/recon_mmx.c \
@@ -43,25 +38,19 @@
enc/reconstruct.c \
enc/dsp.c
+encoder_x86_sources = \
+ enc/x86/dct_decode_mmx.c \
+ enc/x86/dsp_mmx.c \
+ enc/x86/dsp_mmxext.c \
+ enc/x86/recon_mmx.c \
+ enc/x86/idct_mmx.c \
+ enc/x86/fdct_mmx.c
+
if CPU_x86_64
-enc_arch_dir = enc/x86_64
-encoder_arch_sources= \
- $(enc_arch_dir)/dct_decode_mmx.c \
- $(enc_arch_dir)/dsp_mmx.c \
- $(enc_arch_dir)/dsp_mmxext.c \
- $(enc_arch_dir)/recon_mmx.c \
- $(enc_arch_dir)/idct_mmx.c \
- $(enc_arch_dir)/fdct_mmx.c
+encoder_arch_sources = $(encoder_x86_sources)
else
if CPU_x86_32
-enc_arch_dir = enc/x86_32
-encoder_arch_sources= \
- $(enc_arch_dir)/dct_decode_mmx.c \
- $(enc_arch_dir)/dsp_mmx.c \
- $(enc_arch_dir)/dsp_mmxext.c \
- $(enc_arch_dir)/recon_mmx.c \
- $(enc_arch_dir)/idct_mmx.c \
- $(enc_arch_dir)/fdct_mmx.c
+encoder_arch_sources = $(encoder_x86_sources)
endif
endif
Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-05 05:40:36 UTC (rev 15556)
@@ -318,8 +318,8 @@
#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
-extern void IDct1( ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix,
+extern void IDct1( const ogg_int16_t *InputData,
+ const ogg_int16_t *QuantMatrix,
ogg_int16_t *OutputData );
extern void ReconRefFrames (CP_INSTANCE *cpi);
@@ -345,12 +345,12 @@
int n);
extern void dct_tokenize_init (CP_INSTANCE *cpi);
extern int dct_tokenize_AC (CP_INSTANCE *cpi,
- int fi,
- ogg_int16_t *dct,
- ogg_int16_t *dequant,
- ogg_int16_t *origdct,
- int chroma,
- token_checkpoint_t **stack);
+ const int fi,
+ ogg_int16_t *dct,
+ const ogg_int16_t *dequant,
+ const ogg_int16_t *origdct,
+ const int chroma,
+ token_checkpoint_t **stack);
extern void dct_tokenize_finish (CP_INSTANCE *cpi);
extern void dct_tokenize_mark_ac_chroma (CP_INSTANCE *cpi);
Modified: branches/theora-thusnelda/lib/enc/dct_encode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -468,80 +468,72 @@
simply assume there will be a nonzero DC value and code. That's
not a true assumption but it can be fixed-up as DC is tokenized
later */
-int dct_tokenize_AC(CP_INSTANCE *cpi, int fi,
- ogg_int16_t *dct, ogg_int16_t *dequant, ogg_int16_t *origdct,
- int chroma, token_checkpoint_t **stack){
+int dct_tokenize_AC(CP_INSTANCE *cpi, const int fi,
+ ogg_int16_t *dct, const ogg_int16_t *dequant,
+ const ogg_int16_t *origdct, const int chroma,
+ token_checkpoint_t **stack){
int coeff = 1; /* skip DC for now */
+ int i = coeff;
int retcost = 0;
- while(coeff < BLOCK_SIZE){
- int i = coeff;
- int ret;
- while( !dct[i] && (++i < BLOCK_SIZE) );
+ while( !dct[i] && (++i < BLOCK_SIZE) );
- if ( i == BLOCK_SIZE ){
-
- retcost += tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
- coeff = BLOCK_SIZE;
- }else{
+ while(i < BLOCK_SIZE){
+ int ret;
- /* determine costs for encoding this value (and any preceeding
- eobrun/zerorun) as well as the cost for encoding a demoted token */
- int costA = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]),costB;
- int costD = costA;
- int dval = (dct[i]>0 ? dct[i]-1 : dct[i]+1);
- int j=i;
- if(dval){
- /* demoting will not produce a zero. */
- costD -= costB = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+ /* determine costs for encoding this value (and any preceeding
+ eobrun/zerorun) as well as the cost for encoding a demoted token */
+ int costA = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]),costB;
+ int costD = costA;
+ int dval = (dct[i]>0 ? dct[i]-1 : dct[i]+1);
+ int j=i+1;
+ while((j < BLOCK_SIZE) && !dct[j] ) j++;
+
+ if(dval){
+ /* demoting will not produce a zero. */
+ costD -= costB = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+ }else{
+ /* demoting token will produce a zero. */
+ costB = 0;
+ if(j==BLOCK_SIZE){
+ costD += tokenize_eobcost(cpi,chroma,i+1);
+ costD -= tokenize_eobcost(cpi,chroma,coeff);
}else{
- /* demoting token will produce a zero. */
- j=i+1;
- costB = 0;
- while((j < BLOCK_SIZE) && !dct[j] ) j++;
- if(j==BLOCK_SIZE){
- costD += tokenize_eobcost(cpi,chroma,i+1);
- costD -= tokenize_eobcost(cpi,chroma,coeff);
- }else{
- costD += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
- costD -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
- }
+ costD += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
+ costD -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
}
+ }
- if(costD>0){
- /* demoting results in a cheaper token cost. Is the bit savings worth the added distortion? */
- int ii = dezigzag_index[i];
- int od = dct[i]*dequant[i] - origdct[ii];
- int dd = dval*dequant[i] - origdct[ii];
- int delta = dd*dd - od*od;
-
- if(delta < costD*cpi->token_lambda){
- /* we have a winner. Demote token */
- dct[i]=dval;
- costA=costB;
-
- if(dval==0){
- if(j==BLOCK_SIZE){
- retcost += tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
- coeff = BLOCK_SIZE;
- break;
- }else{
- i=j;
- continue;
- }
- }
+ if(costD>0){
+ /* demoting results in a cheaper token cost. Is the bit savings worth the added distortion? */
+ int ii = dezigzag_index[i];
+ int od = dct[i]*dequant[i] - origdct[ii];
+ int dd = dval*dequant[i] - origdct[ii];
+ int delta = dd*dd - od*od;
+
+ if(delta < costD*cpi->token_lambda){
+ /* we have a winner. Demote token */
+ dct[i]=dval;
+ costA=costB;
+
+ if(dval==0){
+ if(j==BLOCK_SIZE) break;
+ i=j;
+ continue;
}
}
- retcost+=costA;
-
- ret = tokenize_dctval(cpi, chroma, fi, coeff, i, dct[i], stack);
- if(!ret)
- tokenize_dctval(cpi, chroma, fi, i, i, dct[i], stack);
- coeff=i+1;
-
}
+
+ retcost+=costA;
+
+ ret = tokenize_dctval(cpi, chroma, fi, coeff, i, dct[i], stack);
+ if(!ret)
+ tokenize_dctval(cpi, chroma, fi, i, i, dct[i], stack);
+ coeff=i+1;
+ i=j;
+
}
- return retcost;
+ return retcost+tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
}
/* called after AC tokenization is complete, because DC coding has to
Modified: branches/theora-thusnelda/lib/enc/dsp.c
===================================================================
--- branches/theora-thusnelda/lib/enc/dsp.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/dsp.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -43,7 +43,7 @@
memset(ptr,val,8);
}
-static void sub8x8__c (unsigned char *FiltPtr, unsigned char *ReconPtr,
+static void sub8x8__c (const unsigned char *FiltPtr, const unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine){
int i;
@@ -65,7 +65,7 @@
}
}
-static void sub8x8_128__c (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
+static void sub8x8_128__c (const unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
ogg_uint32_t PixelsPerLine) {
int i;
/* For each block row */
@@ -89,99 +89,8 @@
}
}
-static void sub8x8avg2__c (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine) {
-
- int i;
-
- /* For each block row */
- for (i=8; i; i--) {
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- ReconPtr1 += PixelsPerLine;
- ReconPtr2 += PixelsPerLine;
- DctInputPtr += 8;
- }
-}
-
-static ogg_uint32_t row_sad8__c (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t SadValue;
- ogg_uint32_t SadValue1;
-
- SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
- DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
- DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
- DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
-
- SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
- DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
- DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
- DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
-
- SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
-
- return SadValue;
-}
-
-static ogg_uint32_t col_sad8x8__c (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t MaxSad = 0;
- ogg_uint32_t i;
-
- for ( i = 0; i < 4; i++ ){
- SadValue[0] += abs(Src1[0] - Src2[0]);
- SadValue[1] += abs(Src1[1] - Src2[1]);
- SadValue[2] += abs(Src1[2] - Src2[2]);
- SadValue[3] += abs(Src1[3] - Src2[3]);
- SadValue[4] += abs(Src1[4] - Src2[4]);
- SadValue[5] += abs(Src1[5] - Src2[5]);
- SadValue[6] += abs(Src1[6] - Src2[6]);
- SadValue[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 4; i++ ){
- SadValue2[0] += abs(Src1[0] - Src2[0]);
- SadValue2[1] += abs(Src1[1] - Src2[1]);
- SadValue2[2] += abs(Src1[2] - Src2[2]);
- SadValue2[3] += abs(Src1[3] - Src2[3]);
- SadValue2[4] += abs(Src1[4] - Src2[4]);
- SadValue2[5] += abs(Src1[5] - Src2[5]);
- SadValue2[6] += abs(Src1[6] - Src2[6]);
- SadValue2[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 8; i++ ){
- if ( SadValue[i] > MaxSad )
- MaxSad = SadValue[i];
- if ( SadValue2[i] > MaxSad )
- MaxSad = SadValue2[i];
- }
-
- return MaxSad;
-}
-
-static ogg_uint32_t sad8x8__c (unsigned char *ptr1,
- unsigned char *ptr2,
+static ogg_uint32_t sad8x8__c (const unsigned char *ptr1,
+ const unsigned char *ptr2,
ogg_uint32_t stride)
{
ogg_uint32_t i;
@@ -205,8 +114,8 @@
return sad;
}
-static ogg_uint32_t sad8x8_thres__c (unsigned char *ptr1,
- unsigned char *ptr2,
+static ogg_uint32_t sad8x8_thres__c (const unsigned char *ptr1,
+ const unsigned char *ptr2,
ogg_uint32_t stride,
ogg_uint32_t thres)
{
@@ -234,9 +143,9 @@
return sad;
}
-static ogg_uint32_t sad8x8_xy2_thres__c (unsigned char *SrcData,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2,
+static ogg_uint32_t sad8x8_xy2_thres__c (const unsigned char *SrcData,
+ const unsigned char *RefDataPtr1,
+ const unsigned char *RefDataPtr2,
ogg_uint32_t Stride,
ogg_uint32_t thres)
{
@@ -265,143 +174,6 @@
return sad;
}
-static ogg_uint32_t intra8x8_err__c (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
-
- for (i=8; i; i--) {
- /* Examine alternate pixel locations. */
- XSum += DataPtr[0];
- XXSum += DataPtr[0]*DataPtr[0];
- XSum += DataPtr[1];
- XXSum += DataPtr[1]*DataPtr[1];
- XSum += DataPtr[2];
- XXSum += DataPtr[2]*DataPtr[2];
- XSum += DataPtr[3];
- XXSum += DataPtr[3]*DataPtr[3];
- XSum += DataPtr[4];
- XXSum += DataPtr[4]*DataPtr[4];
- XSum += DataPtr[5];
- XXSum += DataPtr[5]*DataPtr[5];
- XSum += DataPtr[6];
- XXSum += DataPtr[6]*DataPtr[6];
- XSum += DataPtr[7];
- XXSum += DataPtr[7]*DataPtr[7];
-
- /* Step to next row of block. */
- DataPtr += Stride;
- }
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__c (unsigned char *SrcData,
- unsigned char *RefDataPtr,
- ogg_uint32_t Stride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += Stride;
- RefDataPtr += Stride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static ogg_uint32_t inter8x8_err_xy2__c (unsigned char *SrcData,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2,
- ogg_uint32_t Stride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += Stride;
- RefDataPtr1 += Stride;
- RefDataPtr2 += Stride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
static void nop (void) { /* NOP */ }
void dsp_init(DspFunctions *funcs)
@@ -411,15 +183,9 @@
funcs->set8x8 = set8x8__c;
funcs->sub8x8 = sub8x8__c;
funcs->sub8x8_128 = sub8x8_128__c;
- funcs->sub8x8avg2 = sub8x8avg2__c;
- funcs->row_sad8 = row_sad8__c;
- funcs->col_sad8x8 = col_sad8x8__c;
funcs->sad8x8 = sad8x8__c;
funcs->sad8x8_thres = sad8x8_thres__c;
funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__c;
- funcs->intra8x8_err = intra8x8_err__c;
- funcs->inter8x8_err = inter8x8_err__c;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__c;
}
void dsp_static_init(DspFunctions *funcs)
Modified: branches/theora-thusnelda/lib/enc/dsp.h
===================================================================
--- branches/theora-thusnelda/lib/enc/dsp.h 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/dsp.h 2008-12-05 05:40:36 UTC (rev 15556)
@@ -29,63 +29,49 @@
void (*set8x8) (unsigned char val, unsigned char *ptr,
ogg_uint32_t stride);
- void (*sub8x8) (unsigned char *FiltPtr, unsigned char *ReconPtr,
+ void (*sub8x8) (const unsigned char *FiltPtr, const unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, ogg_uint32_t stride);
- void (*sub8x8_128) (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
+ void (*sub8x8_128) (const unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
ogg_uint32_t stride);
- void (*sub8x8avg2) (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
+ void (*copy8x8) (const unsigned char *src, unsigned char *dest,
ogg_uint32_t stride);
-
- void (*copy8x8) (unsigned char *src, unsigned char *dest,
- ogg_uint32_t stride);
- void (*copy8x8_half) (unsigned char *src1, unsigned char *src2,
+ void (*copy8x8_half) (const unsigned char *src1, const unsigned char *src2,
unsigned char *dest, ogg_uint32_t stride);
- void (*recon8x8) (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
+ void (*recon8x8) (unsigned char *ReconPtr, const ogg_int16_t *ChangePtr,
ogg_uint32_t stride);
- void (*fdct_short) (ogg_int16_t *InputData, ogg_int16_t *OutputData);
+ void (*fdct_short) (const ogg_int16_t *InputData, ogg_int16_t *OutputData);
- ogg_uint32_t (*row_sad8) (unsigned char *Src1, unsigned char *Src2);
-
- ogg_uint32_t (*col_sad8x8) (unsigned char *Src1, unsigned char *Src2,
+ ogg_uint32_t (*sad8x8) (const unsigned char *ptr1, const unsigned char *ptr2,
ogg_uint32_t stride);
-
- ogg_uint32_t (*sad8x8) (unsigned char *ptr1, unsigned char *ptr2,
- ogg_uint32_t stride);
- ogg_uint32_t (*sad8x8_thres) (unsigned char *ptr1, unsigned char *ptr2,
+ ogg_uint32_t (*sad8x8_thres) (const unsigned char *ptr1, const unsigned char *ptr2,
ogg_uint32_t stride, ogg_uint32_t thres);
- ogg_uint32_t (*sad8x8_xy2_thres)(unsigned char *SrcData, unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t stride,
+ ogg_uint32_t (*sad8x8_xy2_thres)(const unsigned char *SrcData, const unsigned char *RefDataPtr1,
+ const unsigned char *RefDataPtr2, ogg_uint32_t stride,
ogg_uint32_t thres);
-
- ogg_uint32_t (*intra8x8_err) (unsigned char *DataPtr, ogg_uint32_t stride);
-
- ogg_uint32_t (*inter8x8_err) (unsigned char *SrcData, unsigned char *RefData,
- ogg_uint32_t stride);
-
- ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t stride);
-
+
void (*LoopFilter) (CP_INSTANCE *cpi, int FLimit);
void (*FilterVert) (unsigned char * PixelPtr,
ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
- void (*IDctSlow) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+ void (*IDctSlow) (const ogg_int16_t *InputData,
+ const ogg_int16_t *QuantMatrix,
+ ogg_int16_t *OutputData);
- void (*IDct3) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+ void (*IDct3) (const ogg_int16_t *InputData,
+ const ogg_int16_t *QuantMatrix,
+ ogg_int16_t *OutputData);
- void (*IDct10) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+ void (*IDct10) (const ogg_int16_t *InputData,
+ const ogg_int16_t *QuantMatrix,
+ ogg_int16_t *OutputData);
} DspFunctions;
extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
@@ -114,8 +100,6 @@
#define dsp_sub8x8_128(funcs,a1,a2,a3) (funcs.sub8x8_128 (a1,a2,a3))
-#define dsp_sub8x8avg2(funcs,a1,a2,a3,a4,a5) (funcs.sub8x8avg2 (a1,a2,a3,a4,a5))
-
#define dsp_copy8x8(funcs,ptr1,ptr2,str1) (funcs.copy8x8 (ptr1,ptr2,str1))
#define dsp_copy8x8_half(funcs,ptr1,ptr2,ptr3,str1) (funcs.copy8x8_half (ptr1,ptr2,ptr3,str1))
@@ -124,10 +108,6 @@
#define dsp_fdct_short(funcs,in,out) (funcs.fdct_short (in,out))
-#define dsp_row_sad8(funcs,ptr1,ptr2) (funcs.row_sad8 (ptr1,ptr2))
-
-#define dsp_col_sad8x8(funcs,ptr1,ptr2,str1) (funcs.col_sad8x8 (ptr1,ptr2,str1))
-
#define dsp_sad8x8(funcs,ptr1,ptr2,str) (funcs.sad8x8 (ptr1,ptr2,str))
#define dsp_sad8x8_thres(funcs,ptr1,ptr2,str,t) (funcs.sad8x8_thres (ptr1,ptr2,str,t))
@@ -135,14 +115,6 @@
#define dsp_sad8x8_xy2_thres(funcs,ptr1,ptr2,ptr3,str,t) \
(funcs.sad8x8_xy2_thres (ptr1,ptr2,ptr3,str,t))
-#define dsp_intra8x8_err(funcs,ptr1,str1) (funcs.intra8x8_err (ptr1,str1))
-
-#define dsp_inter8x8_err(funcs,ptr1,ptr2,str) \
- (funcs.inter8x8_err (ptr1,ptr2,str))
-
-#define dsp_inter8x8_err_xy2(funcs,ptr1,ptr2,ptr3,str) \
- (funcs.inter8x8_err_xy2 (ptr1,ptr2,ptr3,str))
-
#define dsp_LoopFilter(funcs, ptr1, i) \
(funcs.LoopFilter(ptr1, i))
Modified: branches/theora-thusnelda/lib/enc/encoder_idct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_idct.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/encoder_idct.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -111,9 +111,9 @@
}
*/
-static void dequant_slow( ogg_int16_t * dequant_coeffs,
- ogg_int16_t * quantized_list,
- ogg_int32_t * DCT_block) {
+static void dequant_slow( const ogg_int16_t * dequant_coeffs,
+ const ogg_int16_t * quantized_list,
+ ogg_int32_t * DCT_block) {
int i;
for(i=0;i<64;i++)
DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
@@ -121,9 +121,9 @@
-void IDctSlow__c( ogg_int16_t * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) {
+void IDctSlow__c( const ogg_int16_t * InputData,
+ const ogg_int16_t *QuantMatrix,
+ ogg_int16_t * OutputData ) {
ogg_int32_t IntermediateData[64];
ogg_int32_t * ip = IntermediateData;
ogg_int16_t * op = OutputData;
@@ -340,9 +340,9 @@
0 0 0 0 0 0 0 0
*************************/
-static void dequant_slow10( ogg_int16_t * dequant_coeffs,
- ogg_int16_t * quantized_list,
- ogg_int32_t * DCT_block){
+static void dequant_slow10( const ogg_int16_t * dequant_coeffs,
+ const ogg_int16_t * quantized_list,
+ ogg_int32_t * DCT_block){
int i;
memset(DCT_block,0, 128);
for(i=0;i<10;i++)
@@ -350,9 +350,9 @@
}
-void IDct10__c( ogg_int16_t * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ){
+void IDct10__c( const ogg_int16_t * InputData,
+ const ogg_int16_t *QuantMatrix,
+ ogg_int16_t * OutputData ){
ogg_int32_t IntermediateData[64];
ogg_int32_t * ip = IntermediateData;
ogg_int16_t * op = OutputData;
@@ -542,18 +542,18 @@
0 0 0 0 0 0 0 0
**************************/
-void IDct1( ogg_int16_t * InputData,
- ogg_int16_t *QuantMatrix,
+void IDct1( const ogg_int16_t * InputData,
+ const ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
int loop;
ogg_int16_t OutD;
-
+
OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
-
+
for(loop=0;loop<64;loop++)
OutputData[loop]=OutD;
-
+
}
void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
Modified: branches/theora-thusnelda/lib/enc/encoder_quant.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_quant.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/encoder_quant.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -171,7 +171,7 @@
q=((ogg_uint32_t)qinfo->ac_scale[qi]*base[ci]/100)<<2;
q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
cpi->quant_tables[qti][pli][zigzag_index[ci]][qi]=(ogg_uint16_t)q;
- cpi->iquant_tables[qti][pli][qi][ci]=(ogg_int32_t)(((1<<31))/q+1);
+ cpi->iquant_tables[qti][pli][qi][zigzag_index[ci]]=(ogg_int32_t)(((1<<31))/q+1);
}
if(++qi>=qi_end)break;
Modified: branches/theora-thusnelda/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -119,8 +119,8 @@
cpi->BaseQ = c->quality;
/* temporary while the RD code is only partially complete */
- cpi->skip_lambda=50;
- cpi->token_lambda=50;
+ cpi->skip_lambda=1000;
+ cpi->token_lambda=2000;
cpi->mv_lambda=0;
/* Set encoder flags. */
Modified: branches/theora-thusnelda/lib/enc/frinit.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frinit.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/frinit.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -112,13 +112,11 @@
cpi->super_n[2] = cpi->super_h[2] * cpi->super_v[2];
cpi->super_total = cpi->super_n[0] + cpi->super_n[1] + cpi->super_n[2];
- /* +1; the last entry is the 'invalid' frag, which is always set to not coded as it doesn't really exist */
- cpi->frag_coded = calloc(cpi->frag_total+1, sizeof(*cpi->frag_coded));
+ cpi->frag_coded = calloc(cpi->frag_total, sizeof(*cpi->frag_coded));
cpi->frag_buffer_index = calloc(cpi->frag_total, sizeof(*cpi->frag_buffer_index));
cpi->frag_dc = calloc(cpi->frag_total, sizeof(*cpi->frag_dc));
- /* +1; the last entry is the 'invalid' mb, which contains only 'invalid' frags */
- cpi->macro = calloc(cpi->macro_total+1, sizeof(*cpi->macro));
+ cpi->macro = calloc(cpi->macro_total, sizeof(*cpi->macro));
cpi->super[0] = calloc(cpi->super_total, sizeof(**cpi->super));
cpi->super[1] = cpi->super[0] + cpi->super_n[0];
@@ -138,8 +136,8 @@
#ifdef COLLECT_METRICS
{
int i;
- cpi->frag_mbi = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->frag_mbi));
- cpi->frag_sad = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->frag_sad));
+ cpi->frag_mbi = _ogg_calloc(cpi->frag_total, sizeof(*cpi->frag_mbi));
+ cpi->frag_sad = _ogg_calloc(cpi->frag_total, sizeof(*cpi->frag_sad));
cpi->dct_token_frag_storage = _ogg_malloc(cpi->stack_offset*BLOCK_SIZE*sizeof(*cpi->dct_token_frag_storage));
cpi->dct_eob_fi_storage = _ogg_malloc(cpi->frag_total*BLOCK_SIZE*sizeof(*cpi->dct_eob_fi_storage));
}
@@ -169,7 +167,7 @@
int fragindex = frow*cpi->frag_h[plane] + fcol + offset;
cpi->super[plane][superindex].f[frag] = fragindex;
}else
- cpi->super[plane][superindex].f[frag] = cpi->frag_total; /* 'invalid' */
+ cpi->super[plane][superindex].f[frag] = -1; /* 'invalid' */
}
}
}
@@ -189,7 +187,7 @@
cpi->super[0][superindex].m[mb] = macroindex;
cpi->macro[macroindex].ysb = superindex;
}else
- cpi->super[0][superindex].m[mb] = cpi->macro_total;
+ cpi->super[0][superindex].m[mb] = -1;
}
}
}
@@ -207,7 +205,7 @@
cpi->super[1][superindex].m[mb] = macroindex;
cpi->macro[macroindex].usb = superindex + cpi->super_n[0];
}else
- cpi->super[1][superindex].m[mb] = cpi->macro_total;
+ cpi->super[1][superindex].m[mb] = -1;
}
}
}
@@ -225,7 +223,7 @@
cpi->super[2][superindex].m[mb] = macroindex;
cpi->macro[macroindex].vsb = superindex + cpi->super_n[0] + cpi->super_n[1];
}else
- cpi->super[2][superindex].m[mb] = cpi->macro_total;
+ cpi->super[2][superindex].m[mb] = -1;
}
}
}
@@ -253,8 +251,8 @@
int Rrow = baserow + ((frag>>1)&1);
int Rcol = basecol + (frag&1);
- cpi->macro[macroindex].Hyuv[0][frag] = cpi->frag_total; // default
- cpi->macro[macroindex].Ryuv[0][frag] = cpi->frag_total; //default
+ cpi->macro[macroindex].Hyuv[0][frag] = -1;
+ cpi->macro[macroindex].Ryuv[0][frag] = -1;
if(Hrow<cpi->frag_v[0] && Hcol<cpi->frag_h[0]){
cpi->macro[macroindex].Hyuv[0][frag] = Hrow*cpi->frag_h[0] + Hcol;
#ifdef COLLECT_METRICS
@@ -266,14 +264,14 @@
}
/* U */
- cpi->macro[macroindex].Ryuv[1][0] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[1][1] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[1][2] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[1][3] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[1][0] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[1][1] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[1][2] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[1][3] = cpi->frag_total;
+ cpi->macro[macroindex].Ryuv[1][0] = -1;
+ cpi->macro[macroindex].Ryuv[1][1] = -1;
+ cpi->macro[macroindex].Ryuv[1][2] = -1;
+ cpi->macro[macroindex].Ryuv[1][3] = -1;
+ cpi->macro[macroindex].Hyuv[1][0] = -1;
+ cpi->macro[macroindex].Hyuv[1][1] = -1;
+ cpi->macro[macroindex].Hyuv[1][2] = -1;
+ cpi->macro[macroindex].Hyuv[1][3] = -1;
if(row<cpi->frag_v[1] && col<cpi->frag_h[1]){
cpi->macro[macroindex].Hyuv[1][0] = cpi->frag_n[0] + macroindex;
cpi->macro[macroindex].Ryuv[1][0] = cpi->frag_n[0] + macroindex;
@@ -283,14 +281,14 @@
}
/* V */
- cpi->macro[macroindex].Ryuv[2][0] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[2][1] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[2][2] = cpi->frag_total;
- cpi->macro[macroindex].Ryuv[2][3] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[2][0] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[2][1] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[2][2] = cpi->frag_total;
- cpi->macro[macroindex].Hyuv[2][3] = cpi->frag_total;
+ cpi->macro[macroindex].Ryuv[2][0] = -1;
+ cpi->macro[macroindex].Ryuv[2][1] = -1;
+ cpi->macro[macroindex].Ryuv[2][2] = -1;
+ cpi->macro[macroindex].Ryuv[2][3] = -1;
+ cpi->macro[macroindex].Hyuv[2][0] = -1;
+ cpi->macro[macroindex].Hyuv[2][1] = -1;
+ cpi->macro[macroindex].Hyuv[2][2] = -1;
+ cpi->macro[macroindex].Hyuv[2][3] = -1;
if(row<cpi->frag_v[2] && col<cpi->frag_h[2]){
cpi->macro[macroindex].Hyuv[2][0] = cpi->frag_n[0] + cpi->frag_n[1] + macroindex;
cpi->macro[macroindex].Ryuv[2][0] = cpi->frag_n[0] + cpi->frag_n[1] + macroindex;
@@ -374,21 +372,6 @@
}
}
- /* fill in 'invalid' macroblock */
- {
- int p,f;
- for(p=0;p<3;p++)
- for(f=0;f<4;f++){
- cpi->macro[cpi->macro_total].Ryuv[p][f] = cpi->frag_total;
- cpi->macro[cpi->macro_total].Hyuv[p][f] = cpi->frag_total;
- }
- cpi->macro[cpi->macro_total].ncneighbors=0;
- cpi->macro[cpi->macro_total].npneighbors=0;
-#ifdef COLLECT_METRICS
- cpi->frag_mbi[cpi->frag_total] = cpi->macro_total;
-#endif
- }
-
/* allocate frames */
cpi->frame = _ogg_calloc(cpi->frame_size,sizeof(*cpi->frame));
cpi->lastrecon = _ogg_calloc(cpi->frame_size,sizeof(*cpi->lastrecon));
Modified: branches/theora-thusnelda/lib/enc/mcenc.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mcenc.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/mcenc.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -207,7 +207,6 @@
const unsigned char *ref = (_goldenp ? cpi->golden : cpi->lastrecon) + base_offset;
err+= dsp_sad8x8_xy2_thres (cpi->dsp, cur, ref+_mvoffset0, ref+_mvoffset1, cpi->stride[0], _best_err-err);
- //err+= dsp_inter8x8_err_xy2 (cpi->dsp, cur, ref+_mvoffset0, ref+_mvoffset1, cpi->stride[0]);
}
@@ -231,13 +230,12 @@
err=0;
for(bi=0;bi<4;bi++){
int fi = mb->Ryuv[0][bi];
- if(fi < cpi->frag_total){ /* last fragment is the 'invalid fragment' */
+ if(fi >= 0){ /* last fragment is the 'invalid fragment' */
ogg_uint32_t base_offset = cpi->frag_buffer_index[fi];
const unsigned char *cur = cpi->frame + base_offset;
const unsigned char *ref = (_goldenp ? cpi->golden : cpi->lastrecon) + base_offset;
_block_err[bi] = dsp_sad8x8_thres (cpi->dsp, cur, ref+mvoffset,stride,9999999);
- //_block_err[bi] = dsp_inter8x8_err (cpi->dsp, cur, ref+mvoffset,stride);
err += _block_err[bi];
}
@@ -310,7 +308,7 @@
int err;
int fi = mb->Ryuv[0][_bi];
- if(fi == cpi->frag_total) return _best_err;
+ if(fi < 0) return _best_err;
mvoffset_base=_vec->x+_vec->y*stride;
offset_y[0]=offset_y[1]=offset_y[2]=-stride;
@@ -342,7 +340,6 @@
mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
err=dsp_sad8x8_xy2_thres (cpi->dsp, cur, ref+mvoffset0, ref+mvoffset1, stride, _best_err);
- //err=dsp_inter8x8_err_xy2 (cpi->dsp, cur, ref+mvoffset0, ref+mvoffset1, stride);
if(err<_best_err){
_best_err=err;
Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/mode.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -71,7 +71,6 @@
*/
-#include<stdio.h>
void oc_mode_scheme_chooser_init(CP_INSTANCE *cpi){
oc_mode_scheme_chooser *chooser = &cpi->chooser;
int i;
@@ -275,22 +274,27 @@
static int BInterSAD(CP_INSTANCE *cpi, int fi, int plane, int goldenp, mv_t mv){
int sad = 0;
unsigned char *b = cpi->frame + cpi->frag_buffer_index[fi];
- int qp = (plane>0);
- int mx = mvmap[qp][mv.x+31];
- int my = mvmap[qp][mv.y+31];
- int mx2 = mvmap2[qp][mv.x+31];
- int my2 = mvmap2[qp][mv.y+31];
-
- int stride = cpi->stride[plane];
unsigned char *r = (goldenp ? cpi->golden : cpi->lastrecon ) +
- cpi->frag_buffer_index[fi] + my * stride + mx;
-
- if(mx2 || my2){
- unsigned char *r2 = r + my2 * stride + mx2;
- sad = dsp_sad8x8_xy2_thres (cpi->dsp, b, r, r2, stride, 9999999);
- }else{
+ cpi->frag_buffer_index[fi];
+ int stride = cpi->stride[plane];
+
+ if(mv.x || mv.y){
+ int qp = (plane>0);
+ int mx = mvmap[qp][mv.x+31];
+ int my = mvmap[qp][mv.y+31];
+ int mx2 = mvmap2[qp][mv.x+31];
+ int my2 = mvmap2[qp][mv.y+31];
+
+ r += my * stride + mx;
+
+ if(mx2 || my2){
+ unsigned char *r2 = r + my2 * stride + mx2;
+ sad = dsp_sad8x8_xy2_thres (cpi->dsp, b, r, r2, stride, 9999999);
+ }else{
+ sad = dsp_sad8x8 (cpi->dsp, b, r, stride);
+ }
+ }else
sad = dsp_sad8x8 (cpi->dsp, b, r, stride);
- }
if(plane)
return sad<<2;
@@ -305,7 +309,7 @@
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BIntraSAD(cpi,fi,i);
cost += BINMAP(mode_rate[qi][i][1],sad);
}
@@ -324,7 +328,7 @@
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,i,mode==CODE_USING_GOLDEN,mv);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -341,7 +345,7 @@
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int bi = cpi->frag_buffer_index[fi];
int stride = cpi->stride[i];
int sad = dsp_sad8x8 (cpi->dsp, cpi->frame+bi, cpi->lastrecon+bi, stride);
@@ -363,7 +367,7 @@
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,i,golden,mb->analysis_mv[0][golden]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -391,7 +395,7 @@
for(j=0;j<4;j++){
int fi=mb->Ryuv[0][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,0,0,mb->mv[j]);
cost += BINMAP(mode_rate[qi][0][0],sad);
@@ -415,7 +419,7 @@
for(i=1;i<3;i++){
int fi=mb->Ryuv[i][0];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,i,0,ch);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -440,7 +444,7 @@
for(i=1;i<3;i++){
for(j=0;j<2;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,i,0,mv[j]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -453,7 +457,7 @@
for(i=1;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(fi<cpi->frag_total){
+ if(fi>=0){
int sad = BInterSAD(cpi,fi,i,0,mb->mv[j]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -527,29 +531,38 @@
int coding_overhead, rd_metric_t *mo, long *rho_count,
token_checkpoint_t **stack){
- int keyframe = (cpi->FrameType == KEY_FRAME);
- int qi = ps->qi;
- ogg_int32_t *iq = ps->iq[mode != CODE_INTRA];
+ const int keyframe = (cpi->FrameType == KEY_FRAME);
+ const int qi = ps->qi;
+ const ogg_int32_t *iq = ps->iq[mode != CODE_INTRA];
ogg_int16_t buffer[64];
ogg_int16_t data[64];
- int bi = cpi->frag_buffer_index[fi];
- int stride = cpi->stride[ps->plane];
- unsigned char *frame_ptr = &cpi->frame[bi];
+ const int bi = cpi->frag_buffer_index[fi];
+ const int stride = cpi->stride[ps->plane];
+ const unsigned char *frame_ptr = &cpi->frame[bi];
unsigned char *lastrecon = ((mode == CODE_USING_GOLDEN ||
mode == CODE_GOLDEN_MV) ?
cpi->golden : cpi->lastrecon)+bi;
unsigned char *thisrecon = cpi->recon+bi;
- int nonzero=63;
- ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
- int uncoded_ssd=0,coded_ssd=0,coded_partial_ssd=0,sad=0;
+ int nonzero=0;
+ const ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
+ int uncoded_ssd=0,coded_ssd=0,coded_partial_ssd=0;
int uncoded_dc=0,coded_dc=0,dc_flag=0;
int lambda = cpi->skip_lambda;
token_checkpoint_t *checkpoint=*stack;
- int sad_cost=0,cost;
+ int cost;
int i;
cpi->frag_coded[fi]=1;
+ /* by way of explanation: although the f_array coding overhead
+ determination is accurate, it is greedy using very coarse-grained
+ local information. Allowing it to mildly discourage coding turns
+ out to be beneficial, but it's not clear that allowing it to
+ encourage coding through negative coding overhead deltas is
+ useful. For that reason, we disallow negative
+ coding_overheads */
+ if(coding_overhead<0)coding_overhead = 0;
+
/* motion comp */
switch(mode){
case CODE_INTER_PLUS_MV:
@@ -588,14 +601,14 @@
break;
}
+#ifdef COLLECT_METRICS
+ int sad=0;
if(mode==CODE_INTRA){
int acc=0;
- for(i=0;i<64;i++)
+ for(i=0;i<64;i++){
acc += data[i];
-
- for(i=0;i<64;i++)
sad += abs((data[i]<<6)-acc);
-
+ }
sad >>=6;
}else{
for(i=0;i<64;i++)
@@ -604,7 +617,6 @@
if(ps->plane)sad<<=2;
}
-#ifdef COLLECT_METRICS
cpi->frag_sad[fi]=sad;
#endif
@@ -623,7 +635,6 @@
}
uncoded_ssd*=ps->ssdmul;
uncoded_ssd <<= 4; /* scale to match DCT domain */
- sad_cost = BINMAP(mode_rate[qi][ps->plane][mode==CODE_INTRA],sad);
}
/* transform */
@@ -634,34 +645,9 @@
int i;
//quant_tables *qq = ps->qq[mode != CODE_INTRA];
- {
- int d;
-
- // rho-domain distribution
+ for(i=0;i<64;i++){
+ int v = buffer[dezigzag_index[i]];
//int pos;
- //int val = (abs(buffer[0])<<dcshift);
- //ogg_int16_t *qqq = (*qq)[0];
- //for(pos=64;pos>0;pos--)
- //if(val < qqq[pos-1])break;
-
- //rho_count[pos]++;
-
- if(abs(buffer[0])>=dequant[0]){
- int val = (((iq[0]>>15)*buffer[0]) + (1<<15) + (((iq[0]&0x7fff)*buffer[0])>>15)) >>16;
- val = (val>511?511:(val<-511?-511:val));
-
- d = val*dequant[0]-buffer[0];
- coded_partial_ssd += d*d;
- data[0] = val;
- }else{
- coded_partial_ssd += buffer[0]*buffer[0];
- data[0] = 0;
- }
- }
-
- for(i=1;i<64;i++){
- int ii = dezigzag_index[i];
- //int pos;
//int val = abs(buffer[ii])<<1;
//ogg_int16_t *qqq = (*qq)[i];
//for(pos=64;pos>0;pos--)
@@ -670,15 +656,18 @@
/* rho-domain distribution */
//rho_count[pos]++;
- {
+ if((abs(v)<<1)>=dequant[i]){
int d;
- int val = (((iq[ii]>>15)*buffer[ii]) + (1<<15) + (((iq[ii]&0x7fff)*buffer[ii])>>15)) >>16;
+ int val = (((iq[i]>>15)*v) + (1<<15) + (((iq[i]&0x7fff)*v)>>15)) >>16;
val = (val>511?511:(val<-511?-511:val));
-
- d = val*dequant[i]-buffer[ii];
+ d = val*dequant[i]-v;
coded_partial_ssd += d*d;
data[i] = val;
+ nonzero=i;
+ }else{
+ coded_partial_ssd += v*v;
+ data[i] = 0;
}
}
@@ -688,44 +677,10 @@
}
cpi->frag_dc[fi] = data[0];
-#if 0
- /* small performance short-circuit:
-
- Because roundoff error means that C2 preservation can't really be
- trusted at low energy levels (and Theora's intentionally leaky
- fDCT makes this way way worse), we shouldn't reply on SSD
- gathered in the frequency domain. We can still use it if we
- expect it to be... off... especially at low energies.
-
- If the partial_ssd indicates this block is not worth the bits by
- some large margin, don't proceed or bother to get a more precise
- determination */
-
- if(!keyframe){
-
- /* Don't short circuit if there's a chance of coding a DC component */
- if( (mode != CODE_INTRA && data[0]==0) ||
- (mode == CODE_INTRA && abs( buffer[0] - (uncoded_dc>>1) + 4096 ) < (dequant[0]>>1))){
-
- /* the partial_ssd underreports distortion, so this comparison
- will only yield false negatives, which are harmless */
- if(uncoded_ssd <= coded_partial_ssd+coding_overhead*lambda+((sad_cost*lambda)>>OC_BIT_SCALE)){
- /* SKIP */
-
- uncode_frag(cpi,fi,ps->plane);
- mo->coded_ssd+=uncoded_ssd; /* We may still be coding the MB even if not this block */
- return 0;
-
- }
- }
- }
-#endif
-
/* tokenize */
cost = dct_tokenize_AC(cpi, fi, data, dequant, buffer, fi>=cpi->frag_n[0], stack);
/* reconstruct */
- while(!data[nonzero] && --nonzero);
switch(nonzero){
case 0:
IDct1( data, dequant, buffer );
@@ -909,7 +864,7 @@
int fi = sb->f[i];
int mb_phase;
- if(fi<cpi->frag_total){
+ if(fi>=0){
token_checkpoint_t *stackptr = stack;
macroblock_t *mb = &cpi->macro[sb->m[i]];
mv_t mv;
@@ -999,7 +954,7 @@
for(j = 0; j<4; j++){ /* mode addressing is through Y plane, always 4 MB per SB */
int mbi = sb->m[j];
- if(mbi >= cpi->macro_total) continue;
+ if(mbi < 0) continue;
int cost[8] = {0,0,0,0, 0,0,0,0};
int overhead[8] = {0,0,0,0, 0,0,0,0};
Modified: branches/theora-thusnelda/lib/enc/reconstruct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/reconstruct.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/reconstruct.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -17,7 +17,7 @@
#include "codec_internal.h"
-static void copy8x8__c (unsigned char *src,
+static void copy8x8__c (const unsigned char *src,
unsigned char *dest,
unsigned int stride)
{
@@ -30,8 +30,8 @@
}
}
-static void copy8x8_half__c (unsigned char *src1,
- unsigned char *src2,
+static void copy8x8_half__c (const unsigned char *src1,
+ const unsigned char *src2,
unsigned char *dest,
unsigned int stride)
{
@@ -55,7 +55,7 @@
static void recon8x8__c (unsigned char *ReconPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
+ const ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
{
ogg_uint32_t i;
Modified: branches/theora-thusnelda/lib/enc/x86_64/dsp_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/dsp_mmx.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/x86_64/dsp_mmx.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -22,15 +22,13 @@
#if defined(USE_ASM)
-typedef unsigned long long ogg_uint64_t;
-
static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
+static void sub8x8__mmx (const unsigned char *FiltPtr, const unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine)
{
__asm__ __volatile__ (
@@ -62,15 +60,15 @@
: "+r" (FiltPtr),
"+r" (ReconPtr),
"+r" (DctInputPtr)
- : "r" ((ogg_uint64_t)PixelsPerLine)
+
+ : "r" ((unsigned long)PixelsPerLine)
: "memory"
);
}
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
+static void sub8x8_128__mmx (const unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
ogg_uint32_t PixelsPerLine)
{
- ogg_uint64_t ppl = PixelsPerLine;
__asm__ __volatile__ (
" .balign 16 \n\t"
@@ -96,188 +94,12 @@
: "+r" (FiltPtr),
"+r" (DctInputPtr)
- : "r" (ppl), /* gcc bug? a cast won't work here, e.g. (ogg_uint64_t)PixelsPerLine */
+ : "r" ((unsigned long)PixelsPerLine),
[V128] "m" (V128)
: "memory"
);
}
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
-
- ".rept 8 \n\t"
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */
- " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */
- " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */
- " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */
- " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */
- " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */
- " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */
- " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */
- " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */
- /* average ReconPtr1 and ReconPtr2 */
- " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */
- " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */
- " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- " movq %%mm0, (%3) \n\t" /* write answer out */
- " movq %%mm2, 8(%3) \n\t" /* write answer out */
- /* Increment pointers */
- " add $16, %3 \n\t"
- " add %4, %0 \n\t"
- " add %4, %1 \n\t"
- " add %4, %2 \n\t"
- ".endr \n\t"
-
- : "+r" (FiltPtr),
- "+r" (ReconPtr1),
- "+r" (ReconPtr2),
- "+r" (DctInputPtr)
- : "r" ((ogg_uint64_t)PixelsPerLine)
- : "memory"
- );
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq %%mm0, %%mm2 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %3, %2 \n\t" /* Inc pointer into src data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%rdi \n\t"
- " movsx %%di, %%rdi \n\t"
- " mov %%rdi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=r" (XSum),
- "=r" (XXSum),
- "+r" (DataPtr)
- : "r" ((ogg_uint64_t)Stride)
- : "rdi", "memory"
- );
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, unsigned char *RefDataPtr,
- ogg_uint32_t Stride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq (%3), %%mm1 \n\t"
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm6, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm6, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %4, %2 \n\t" /* Inc pointer into src data */
- " add %4, %3 \n\t" /* Inc pointer into ref data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%rdi \n\t"
- " movsx %%di, %%rdi \n\t"
- " mov %%rdi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr)
- : "r" ((ogg_uint64_t)Stride)
- : "rdi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
static void restore_fpu (void)
{
__asm__ __volatile__ (
@@ -290,9 +112,6 @@
funcs->restore_fpu = restore_fpu;
funcs->sub8x8 = sub8x8__mmx;
funcs->sub8x8_128 = sub8x8_128__mmx;
- funcs->sub8x8avg2 = sub8x8avg2__mmx;
- funcs->intra8x8_err = intra8x8_err__mmx;
- funcs->inter8x8_err = inter8x8_err__mmx;
}
#endif /* USE_ASM */
Modified: branches/theora-thusnelda/lib/enc/x86_64/dsp_mmxext.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/dsp_mmxext.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/x86_64/dsp_mmxext.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -22,9 +22,7 @@
#if defined(USE_ASM)
-typedef unsigned long long ogg_uint64_t;
-
-static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, unsigned char *ptr2,
+static ogg_uint32_t sad8x8__mmxext (const unsigned char *ptr1, const unsigned char *ptr2,
ogg_uint32_t stride)
{
ogg_uint32_t DiffVal;
@@ -51,14 +49,14 @@
: "=r" (DiffVal),
"+r" (ptr1),
"+r" (ptr2)
- : "r" ((ogg_uint64_t)stride)
+ : "r" ((unsigned long)stride)
: "memory"
);
return DiffVal;
}
-static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, unsigned char *ptr2,
+static ogg_uint32_t sad8x8_thres__mmxext (const unsigned char *ptr1, const unsigned char *ptr2,
ogg_uint32_t stride, ogg_uint32_t thres)
{
ogg_uint32_t DiffVal;
@@ -81,15 +79,15 @@
: "=r" (DiffVal),
"+r" (ptr1),
"+r" (ptr2)
- : "r" ((ogg_uint64_t)stride)
+ : "r" ((unsigned long)stride)
: "memory"
);
return DiffVal;
}
-static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t Stride,
+static ogg_uint32_t sad8x8_xy2_thres__mmxext (const unsigned char *SrcData, const unsigned char *RefDataPtr1,
+ const unsigned char *RefDataPtr2, ogg_uint32_t Stride,
ogg_uint32_t thres)
{
ogg_uint32_t DiffVal;
@@ -115,202 +113,18 @@
"+r" (SrcData),
"+r" (RefDataPtr1),
"+r" (RefDataPtr2)
- : "r" ((ogg_uint64_t)Stride)
+ : "r" ((unsigned long)Stride)
: "memory"
);
return DiffVal;
}
-static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " movd (%1), %%mm0 \n\t"
- " movd (%2), %%mm1 \n\t"
- " psadbw %%mm0, %%mm1 \n\t"
- " movd 4(%1), %%mm2 \n\t"
- " movd 4(%2), %%mm3 \n\t"
- " psadbw %%mm2, %%mm3 \n\t"
-
- " pmaxsw %%mm1, %%mm3 \n\t"
- " movd %%mm3, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- :
- : "memory"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */
- " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */
- " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */
- " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */
- " mov $4, %%rdi \n\t" /* 4 rows */
- "1: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " mov $4, %%rdi \n\t" /* 4 rows */
- "2: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%rdi \n\t"
- " jnz 2b \n\t"
-
- " pmaxsw %%mm6, %%mm7 \n\t"
- " pmaxsw %%mm4, %%mm5 \n\t"
- " pmaxsw %%mm5, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $32, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $16, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=r" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- : "r" ((ogg_uint64_t)stride)
- : "memory", "rdi"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t Stride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm4, %%mm4 \n\t"
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
-
- " movq (%3), %%mm2 \n\t"
- " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */
- " pavgb %%mm2, %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm4, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm4, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %5, %2 \n\t" /* Inc pointer into src data */
- " add %5, %3 \n\t" /* Inc pointer into ref data */
- " add %5, %4 \n\t" /* Inc pointer into ref data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "r" ((ogg_uint64_t)Stride)
- : "rdi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
void dsp_mmxext_init(DspFunctions *funcs)
{
- funcs->row_sad8 = row_sad8__mmxext;
- funcs->col_sad8x8 = col_sad8x8__mmxext;
funcs->sad8x8 = sad8x8__mmxext;
funcs->sad8x8_thres = sad8x8_thres__mmxext;
funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext;
}
#endif /* USE_ASM */
Modified: branches/theora-thusnelda/lib/enc/x86_64/fdct_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/fdct_mmx.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/x86_64/fdct_mmx.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -27,13 +27,6 @@
static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
-#if defined(__MINGW32__) || defined(__CYGWIN__) || \
- defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define M(a) "_" #a
-#else
-# define M(a) #a
-#endif
-
/* execute stage 1 of forward DCT */
#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp) \
" movq " #ip0 ", %%mm0 \n\t" \
Modified: branches/theora-thusnelda/lib/enc/x86_64/idct_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/idct_mmx.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/x86_64/idct_mmx.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -356,8 +356,8 @@
*
***************************************************************************************
*/
-void IDctSlow__mmx(ogg_int16_t *in,
- ogg_int16_t *q,
+void IDctSlow__mmx(const ogg_int16_t *in,
+ const ogg_int16_t *q,
ogg_int16_t *out ) {
# define MID(M,I) MtoSTR(M+(I)*8)"(%[c])"
@@ -810,9 +810,9 @@
/* --------------------------------------------------------------- */
/* IDCT 10 */
-void IDct10__mmx( ogg_int16_t *in,
- ogg_int16_t *q,
- ogg_int16_t *out ) {
+void IDct10__mmx( const ogg_int16_t *in,
+ const ogg_int16_t *q,
+ ogg_int16_t *out ) {
__asm__ __volatile__ (
@@ -1063,9 +1063,9 @@
"#end ColumnIDCT_3\n"
//End of ColumnIDCT_3
-void IDct3__mmx( ogg_int16_t *in,
- ogg_int16_t *q,
- ogg_int16_t *out ) {
+void IDct3__mmx( const ogg_int16_t *in,
+ const ogg_int16_t *q,
+ ogg_int16_t *out ) {
__asm__ __volatile__ (
Modified: branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/enc/x86_64/recon_mmx.c 2008-12-05 05:40:36 UTC (rev 15556)
@@ -19,11 +19,9 @@
#if defined(USE_ASM)
-typedef unsigned long long ogg_uint64_t;
-
static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-static void copy8x8__mmx (unsigned char *src,
+static void copy8x8__mmx (const unsigned char *src,
unsigned char *dest,
ogg_uint32_t stride)
{
@@ -57,13 +55,13 @@
" movq %%mm3, (%0, %%rdi) \n\t"
: "+a" (dest)
: "c" (src),
- "d" ((ogg_uint64_t)stride)
+ "d" ((unsigned long)stride)
: "memory", "rdi"
);
}
static void recon8x8__mmx (unsigned char *ReconPtr,
- ogg_int16_t *ChangePtr,
+ const ogg_int16_t *ChangePtr,
ogg_uint32_t LineStep)
{
__asm__ __volatile__ (
@@ -93,7 +91,7 @@
" jc 1b \n\t"
: "+r" (ReconPtr)
: "r" (ChangePtr),
- "r" ((ogg_uint64_t)LineStep)
+ "r" ((unsigned long)LineStep)
: "memory", "rdi"
);
}
Modified: branches/theora-thusnelda/lib/internal.h
===================================================================
--- branches/theora-thusnelda/lib/internal.h 2008-12-03 10:34:41 UTC (rev 15555)
+++ branches/theora-thusnelda/lib/internal.h 2008-12-05 05:40:36 UTC (rev 15556)
@@ -33,7 +33,7 @@
# endif
/*This library's version.*/
-# define OC_VENDOR_STRING "Xiph.Org libThusnelda I 20080310"
+# define OC_VENDOR_STRING "Xiph.Org libThusnelda I 20081201"
/*Theora bitstream version.*/
# define TH_VERSION_MAJOR (3)
More information about the commits
mailing list