[xiph-commits] r17272 - in branches/theora-gumboot/lib: . x86
gumboot at svn.xiph.org
gumboot at svn.xiph.org
Fri Jun 4 06:43:43 PDT 2010
Author: gumboot
Date: 2010-06-04 06:43:43 -0700 (Fri, 04 Jun 2010)
New Revision: 17272
Modified:
branches/theora-gumboot/lib/decode.c
branches/theora-gumboot/lib/internal.h
branches/theora-gumboot/lib/state.c
branches/theora-gumboot/lib/x86/mmxstate.c
branches/theora-gumboot/lib/x86/x86int.h
branches/theora-gumboot/lib/x86/x86state.c
Log:
Separate and move iDCT after MC.
Clean some muck out of oc_dec_mv_unpack_and_frag_modes_fill().
Modified: branches/theora-gumboot/lib/decode.c
===================================================================
--- branches/theora-gumboot/lib/decode.c 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/decode.c 2010-06-04 13:43:43 UTC (rev 17272)
@@ -842,63 +842,57 @@
old_mbi=0;
for (mbi=sby=0;sby<_dec->state.fplanes[0].nsbs;sby+=_dec->state.fplanes[0].nhsbs,mbi+=nhmbs)
for (sbx=0;sbx<_dec->state.fplanes[0].nhsbs;sbx++,mbi+=2){
-// int mask=_dec->state.sb_masks[sby+sbx];
-// int umask=_dec->state.sb_masks[_dec->state.fplanes[1].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
-// int vmask=_dec->state.sb_masks[_dec->state.fplanes[2].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
+#if 0 /* no genius today */
+ ogg_uint16_t lmask [4]= {0x000F,0x00F0,0x0F00,0xF000};
+ ogg_uint16_t cmask444 [4]= {0x000F,0x00F0,0x0F00,0xF000};
+ ogg_uint16_t cmaskRSV[2] [4]= {{0x0003,0x000C,0x3000,0xC000},{0x0090,0x0060,0x0600,0x0900}};
+ ogg_uint16_t cmask422 [2][4]= {{0x0009,0x0030,0x00C0,0x0006},{0x6000,0x0300,0x0C00,0x9000}};
+ ogg_uint16_t cmask420[2][2][4]={{{0x0001,0x0008,0x0004,0x0002},{0x4000,0x2000,0x1000,0x8000}},
+ {{0x0010,0x0020,0x0040,0x0080},{0x0100,0x0200,0x0400,0x0800}}};
+
+// int mask=_dec->state.sb_masks[sby+sbx]&lmask[i];
+// int umask=_dec->state.sb_masks[_dec->state.fplanes[1].sboffset+sby_uv+(sbx>>XDECI)]&cmask[row&YDECI][col&XDECI][i];
+// int vmask=_dec->state.sb_masks[_dec->state.fplanes[2].sboffset+sby_uv+(sbx>>XDECI)]&cmask[row&YDECI][col&XDECI][i];
/* TODO: use the superblock masks directly rather than iterating through
* frags[]. Also use these bitmaps to update frags[].mb_mode in whatever
* order is convenient. This should be faster (if there's enough
* cleverness in the implementation) and it will eliminate use of
* mb_maps[].
*/
-
+#endif
for (i=0;i<4;i++,old_mbi++){
int mb_mode;
mb_mode=rmb_modes[i][mbi];
if(mb_mode!=OC_MODE_INVALID){
oc_mv *mbmv;
ptrdiff_t fragi;
- int coded[13];
- int codedi;
- int ncoded;
+ int lastcoded;
+ int coded;
int mapi;
int mapii;
/*Search for at least one coded fragment.*/
- ncoded=mapii=0;
+ coded=mapii=0;
do{
mapi=map_idxs[mapii];
fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
- if(frags[fragi].coded)coded[ncoded++]=mapi;
+ if(frags[fragi].coded)
+ frags[fragi].mb_mode=mb_mode,coded|=1<<mapii;
}
while(++mapii<map_nidxs);
- if(ncoded<=0)continue;
+ if(coded==0)continue;
mbmv=rmb_mvs[i][mbi];
switch(mb_mode){
case OC_MODE_INTER_MV_FOUR:{
int bi;
- /*Mark the tail of the list, so we don't accidentally go past it.*/
- coded[ncoded]=-1;
- for(bi=codedi=0;bi<4;bi++){
- if(coded[codedi]==bi){
- codedi++;
- fragi=mb_maps[old_mbi][0][bi];
- frags[fragi].mb_mode=mb_mode;
+ for(bi=lastcoded=0;bi<4;bi++){
+ if(coded&1<<bi){
+ lastcoded=bi;
oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv[bi]);
}
else mbmv[bi][0]=mbmv[bi][1]=0;
}
- if(codedi>0){
- memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
- memcpy(last_mv[0],mbmv[coded[codedi-1]],sizeof(last_mv[0]));
- }
- if(codedi<ncoded){
- for(;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- bi=mapi&3;
- fragi=mb_maps[old_mbi][mapi>>2][bi];
- frags[fragi].mb_mode=mb_mode;
- }
- }
+ memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+ memcpy(last_mv[0],mbmv[lastcoded],sizeof(last_mv[0]));
}break;
case OC_MODE_INTER_MV:
memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
@@ -915,15 +909,6 @@
}break;
default:memset(*mbmv,0,sizeof(*mbmv));break;
}
- /*4MV mode fills in the fragments itself.
- For all other modes we can use this common code.*/
- if(mb_mode!=OC_MODE_INTER_MV_FOUR){
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
- frags[fragi].mb_mode=mb_mode;
- }
- }
}
}
}
@@ -1621,11 +1606,11 @@
counts.*/
static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
oc_dec_pipeline_state *_pipe,int _pli){
- static const char rasterise[16]={
- 0, 1, 3, 2,
- 0, 2, 3, 1,
- 0, 2, 3, 1,
- 3, 2, 0, 1,
+ static const char bitraster[4][16]={
+ {0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15, },
+ {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, },
+ {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, },
+ {0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15, },
};
oc_fragment *frags;
ogg_uint16_t *sb_masks;
@@ -1675,15 +1660,14 @@
fragip = _dec->state.sb_maps[sbi][0];
- for (quadi = 0; quadi < 16; quadi += 4, bmask >>= 4, fragip += 4)
+ for (quadi = 0; quadi < 4; quadi++, bmask >>= 4, fragip += 4)
{
/*This array is made one element larger because the zig-zag index array
uses the final element as a dumping ground for out-of-range indices
to protect us from buffer overflow.*/
OC_ALIGN8(ogg_int16_t dct_coeffs[4][64 + 8]);
int bi;
- int last_zzi[4];
- int mask = 0;
+ int mask;
int mb_mode;
ogg_uint16_t dc_quant;
oc_mv *mb_mvs;
@@ -1693,122 +1677,128 @@
if ((bmask & 15) == 0)
continue;
- mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]];
- dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
- mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]];
- frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==12?2:0]];
+ mask = bitraster[quadi][bmask&15];
- for (bi = 0; bi < 4; bi++)
- {
- ptrdiff_t fragi;
- int obi;
- if ((bmask & (1 << bi)) == 0) continue;
- fragi = fragip[bi];
- obi = rasterise[quadi + bi];
+ if (_dec->state.frame_type!=OC_INTRA_FRAME){
+ mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]];
+ dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+ mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]];
+ frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==3?2:0]];
- last_zzi[obi] = oc_dec_get_dct_coeffs(dct_coeffs[obi], _dec, _pipe, _pli, frags + fragi);
- mask |= 1 << obi;
- }
+ switch (pixel_fmt){
+ case TH_PF_444:
+ if (mb_mode==OC_MODE_INTER_MV_FOUR)
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask,mb_mvs);
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+ break;
- switch (pixel_fmt){
- case TH_PF_444:
- if (mb_mode==OC_MODE_INTER_MV_FOUR)
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,mb_mvs);
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- break;
-
- case TH_PF_422:
- /* TODO: code the reference frame index and the motion vector into a
- * single word and then compare left and right copies -- if they're the
- * same then do things quickly instead of like this:
- */
- if (mask&5){
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
- cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
- cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
- cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,cmv);
+ case TH_PF_422:
+ /* TODO: code the reference frame index and the motion vector into a
+ * single word and then compare left and right copies -- if they're the
+ * same then do things quickly instead of like this:
+ */
+ if (mask&5){
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+ cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+ cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+ cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&5,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&5,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
}
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- }
- mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
- dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
- mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+ mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+1];
+ dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+ mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+1];
- if (mask&10){
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
- cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
- cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
- cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,cmv);
+ if (mask&10){
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+ cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+ cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+ cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&10,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&10,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
}
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- }
- break;
+ break;
- case TH_PF_420:
- /* TODO: code the reference frame index and the motion vector into a
- * single word and then compare left and right copies -- if they're the
- * same then do things quickly instead of like this:
- */
- if (mask&1)
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
- cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,cmv);
- }
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+ case TH_PF_420:
+ /* TODO: code the reference frame index and the motion vector into a
+ * single word and then compare left and right copies -- if they're the
+ * same then do things quickly instead of like this:
+ */
+ if (mask&1)
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+ cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&1,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&1,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
- dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
- mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+ mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+1];
+ dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+ mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+1];
- if (mask&2)
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
- cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,cmv);
- }
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+ if (mask&2)
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+ cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&2,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&2,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs];
- dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
- mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs];
+ mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+nhmbs];
+ dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+ mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+nhmbs];
- /* TODO: code the reference frame index and the motion vector into a
- * single word and then compare left and right copies -- if they're the
- * same then do things quickly instead of like this:
- */
- if (mask&4)
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
- cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,cmv);
- }
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+ /* TODO: code the reference frame index and the motion vector into a
+ * single word and then compare left and right copies -- if they're the
+ * same then do things quickly instead of like this:
+ */
+ if (mask&4)
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+ cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&4,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&4,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs+1];
+ mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+nhmbs+1];
+ dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+ mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+nhmbs+1];
+
+ if (mask&8)
+ if (mb_mode==OC_MODE_INTER_MV_FOUR){
+ cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+ cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+ oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&8,cmv);
+ }
+ else
+ oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&8,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+ break;
+ }
+ }
+
+ for (bi = 0; bi < 4; bi++)
+ {
+ ptrdiff_t fragi;
+ int last_zzi;
+ if ((bmask & (1 << bi)) == 0) continue;
+ fragi = fragip[bi];
+ mb_mode=frags[fragi].mb_mode;
dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
- mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs+1];
+ frag_buf_off = _dec->state.frag_buf_offs[fragi];
- if (mask&8)
- if (mb_mode==OC_MODE_INTER_MV_FOUR){
- cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
- cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
- oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,cmv);
- }
- else
- oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
- break;
+ last_zzi = oc_dec_get_dct_coeffs(dct_coeffs[bi], _dec, _pipe, _pli, frags + fragi);
+ oc_state_frag_residual(&_dec->state,frag_buf_off,_pli,dct_coeffs[bi],last_zzi,dc_quant,mb_mode);
}
}
}
Modified: branches/theora-gumboot/lib/internal.h
===================================================================
--- branches/theora-gumboot/lib/internal.h 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/internal.h 2010-06-04 13:43:43 UTC (rev 17272)
@@ -285,12 +285,12 @@
void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
- void (*state_quad_recon)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
- void (*state_4mv_recon)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mv[4]);
+ void (*state_quad_predict)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv);
+ void (*state_4mv_predict)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mv[4]);
+ void (*state_frag_residual)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
void (*state_frag_copy_list)(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
@@ -473,12 +473,12 @@
void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_quad_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
-void oc_state_4mv_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mv[4]);
+void oc_state_quad_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv);
+void oc_state_4mv_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mv[4]);
+void oc_state_frag_residual(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
void oc_state_frag_copy_list(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
Modified: branches/theora-gumboot/lib/state.c
===================================================================
--- branches/theora-gumboot/lib/state.c 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/state.c 2010-06-04 13:43:43 UTC (rev 17272)
@@ -608,8 +608,9 @@
_state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
_state->opt_vtable.idct8x8=oc_idct8x8_c;
_state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
- //_state->opt_vtable.state_frag_recon=oc_state_quad_recon_c;
- //_state->opt_vtable.state_frag_recon=oc_state_4mv_recon_c;
+ //_state->opt_vtable.state_quad_predict=oc_state_quad_predict_c;
+ //_state->opt_vtable.state_4mv_predict=oc_state_4mv_predict_c;
+ //_state->opt_vtable.state_frag_residual=oc_state_frag_residual_c;
_state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_c;
@@ -885,20 +886,22 @@
_last_zzi,_dc_quant);
}
-void oc_state_quad_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv){
- _state->opt_vtable.state_quad_recon(_state,_frag_buf_off,_pli,_dct_coeffs,
- _last_zzi,_dc_quant,_mask,_ref_frame,_mv);
+void oc_state_quad_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv){
+ _state->opt_vtable.state_quad_predict(_state,_frag_buf_off,_pli,_mask,_ref_frame,_mv);
}
-void oc_state_4mv_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mvs[4]){
- _state->opt_vtable.state_4mv_recon(_state,_frag_buf_off,_pli,_dct_coeffs,
- _last_zzi,_dc_quant,_mask,_mvs);
+void oc_state_4mv_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mvs[4]){
+ _state->opt_vtable.state_4mv_predict(_state,_frag_buf_off,_pli,_mask,_mvs);
}
+void oc_state_frag_residual(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode){
+ _state->opt_vtable.state_frag_residual(_state,_frag_buf_off,_pli,_dct_coeffs,
+ _last_zzi,_dc_quant,_mb_mode);
+}
+
void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
unsigned char *dst;
Modified: branches/theora-gumboot/lib/x86/mmxstate.c
===================================================================
--- branches/theora-gumboot/lib/x86/mmxstate.c 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/mmxstate.c 2010-06-04 13:43:43 UTC (rev 17272)
@@ -145,154 +145,145 @@
}
}
-void oc_state_quad_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame, oc_mv _mv){
+static const ogg_int16_t zeroes[64]={0};
+
+void oc_state_quad_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame, oc_mv _mv){
unsigned char *dst;
int ystride;
int nhfrags;
- int i;
ystride=_state->ref_ystride[_pli];
nhfrags=_state->fplanes[_pli].nhfrags;
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
- for (i=0;i<4;i++){
- if ((_mask & 1 << i) == 0)
- continue;
- /*Apply the inverse transform.*/
- /*Special case only having a DC component.*/
- if(_last_zzi[i]<2){
- /*Note that this value must be unsigned, to keep the __asm__ block from
- sign-extending it when it puts it in a register.*/
- ogg_uint16_t p;
- /*We round this dequant product (and not any of the others) because there's
- no iDCT rounding.*/
- p=(ogg_int16_t)(_dct_coeffs[i][0]*(ogg_int32_t)_dc_quant+15>>5);
- /*Fill _dct_coeffs[i] with p.*/
- __asm__ __volatile__(
- /*mm0=0000 0000 0000 AAAA*/
- "movd %[p],%%mm0\n\t"
- /*mm0=0000 0000 AAAA AAAA*/
- "punpcklwd %%mm0,%%mm0\n\t"
- /*mm0=AAAA AAAA AAAA AAAA*/
- "punpckldq %%mm0,%%mm0\n\t"
- "movq %%mm0,(%[y])\n\t"
- "movq %%mm0,8(%[y])\n\t"
- "movq %%mm0,16(%[y])\n\t"
- "movq %%mm0,24(%[y])\n\t"
- "movq %%mm0,32(%[y])\n\t"
- "movq %%mm0,40(%[y])\n\t"
- "movq %%mm0,48(%[y])\n\t"
- "movq %%mm0,56(%[y])\n\t"
- "movq %%mm0,64(%[y])\n\t"
- "movq %%mm0,72(%[y])\n\t"
- "movq %%mm0,80(%[y])\n\t"
- "movq %%mm0,88(%[y])\n\t"
- "movq %%mm0,96(%[y])\n\t"
- "movq %%mm0,104(%[y])\n\t"
- "movq %%mm0,112(%[y])\n\t"
- "movq %%mm0,120(%[y])\n\t"
- :
- :[y]"r"(_dct_coeffs[i]),[p]"r"((unsigned)p)
- :"memory"
- );
- }
- else{
- /*Dequantize the DC coefficient.*/
- _dct_coeffs[i][0]=(ogg_int16_t)(_dct_coeffs[i][0]*(int)_dc_quant);
- oc_idct8x8(_state,_dct_coeffs[i],_last_zzi[i]);
- }
- }
-
/*Fill in the target buffer.*/
- if(_ref_frame==OC_FRAME_SELF) {
- if (_mask & 1) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[0]);
- if (_mask & 2) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[1]);
- dst += 8 * ystride;
- if (_mask & 4) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[2]);
- if (_mask & 8) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[3]);
- }
- else{
+ if(_ref_frame!=OC_FRAME_SELF){
const unsigned char *ref;
int mvoffsets[2];
ref=
_state->ref_frame_data[_state->ref_frame_idx[_ref_frame]]
+_frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mv[0],_mv[1])>1){
- if ((_mask&3)==3){
+ switch(_mask&3){
+ case 3:
oc_int_fragx2_copy2_sse2(dst,ystride,ref+mvoffsets[0],ref+mvoffsets[1],ystride);
- if (_dct_coeffs[0][0]||_last_zzi[0]>1)
- oc_frag_recon_inter_mmx(dst+0,dst+0,ystride,_dct_coeffs[0]);
- if (_dct_coeffs[1][0]||_last_zzi[1]>1)
- oc_frag_recon_inter_mmx(dst+8,dst+8,ystride,_dct_coeffs[1]);
+ break;
+ case 1:
+ oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+ ystride,zeroes);
+ break;
+ case 2:
+ oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+ ystride,zeroes);
}
- else{
- if (_mask & 1)
- oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
- ystride,_dct_coeffs[0]);
- if (_mask & 2)
- oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
- ystride,_dct_coeffs[1]);
- }
dst+=ystride*8;
ref+=ystride*8;
- if ((_mask&12)==12){
+ switch(_mask>>2){
+ case 3:
oc_int_fragx2_copy2_sse2(dst,ystride,ref+mvoffsets[0],ref+mvoffsets[1],ystride);
- if (_dct_coeffs[2][0]||_last_zzi[2]>1)
- oc_frag_recon_inter_mmx(dst+0,dst+0,ystride,_dct_coeffs[2]);
- if (_dct_coeffs[3][0]||_last_zzi[3]>1)
- oc_frag_recon_inter_mmx(dst+8,dst+8,ystride,_dct_coeffs[3]);
+ break;
+ case 1:
+ oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+ ystride,zeroes);
+ break;
+ case 2:
+ oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+ ystride,zeroes);
}
- else{
- if (_mask & 4)
- oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
- ystride,_dct_coeffs[2]);
- if (_mask & 8)
- oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
- ystride,_dct_coeffs[3]);
- }
}
else{
- if (_mask & 1)
- oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
- if (_mask & 2)
- oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
+ switch(_mask&3){
+ case 3:
+ oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ break;
+ case 1:
+ oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ break;
+ case 2:
+ oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ break;
+ }
dst+=ystride*8;
ref+=ystride*8;
- if (_mask & 4)
- oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
- if (_mask & 8)
- oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
+ switch(_mask>>2){
+ case 3:
+ oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ break;
+ case 1:
+ oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ break;
+ case 2:
+ oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ break;
+ };
}
}
}
-void oc_state_4mv_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask, oc_mv _mvs[4]){
+void oc_state_4mv_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask, oc_mv _mvs[4]){
unsigned char *dst;
int ystride;
const unsigned char *ref;
int mvoffsets[2];
- int i;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
- for (i=0;i<4;i++){
- if ((_mask & 1 << i) == 0)
- continue;
- /*Apply the inverse transform.*/
- /*Special case only having a DC component.*/
- if(_last_zzi[i]<2){
- /*Note that this value must be unsigned, to keep the __asm__ block from
- sign-extending it when it puts it in a register.*/
+ /*Fill in the target buffer.*/
+ ref=
+ _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_PREV]]
+ +_frag_buf_off;
+ if (_mask & 1){
+ if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[0][0],_mvs[0][1])>1){
+ oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+ ystride,zeroes);
+ }
+ else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ }
+ if (_mask & 2){
+ if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[1][0],_mvs[1][1])>1){
+ oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+ ystride,zeroes);
+ }
+ else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ }
+ dst+=ystride*8;
+ ref+=ystride*8;
+ if (_mask & 4){
+ if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[2][0],_mvs[2][1])>1){
+ oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+ ystride,zeroes);
+ }
+ else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+ }
+ if (_mask & 8){
+ if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[3][0],_mvs[3][1])>1){
+ oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+ ystride,zeroes);
+ }
+ else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+ }
+}
+
+void oc_state_frag_residual_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode){
+ unsigned char *dst;
+ int ystride;
+ /*Apply the inverse transform.*/
+ /*Special case only having a DC component.*/
+ if(_last_zzi<2){
+ /*Note that this value must be unsigned, to keep the __asm__ block from
+ sign-extending it when it puts it in a register.*/
+ if (_dct_coeffs[0]){
ogg_uint16_t p;
/*We round this dequant product (and not any of the others) because there's
no iDCT rounding.*/
- p=(ogg_int16_t)(_dct_coeffs[i][0]*(ogg_int32_t)_dc_quant+15>>5);
- /*Fill _dct_coeffs[i] with p.*/
+ p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+ /*Fill _dct_coeffs with p.*/
__asm__ __volatile__(
/*mm0=0000 0000 0000 AAAA*/
"movd %[p],%%mm0\n\t"
@@ -317,51 +308,21 @@
"movq %%mm0,112(%[y])\n\t"
"movq %%mm0,120(%[y])\n\t"
:
- :[y]"r"(_dct_coeffs[i]),[p]"r"((unsigned)p)
+ :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p)
:"memory"
);
}
- else{
- /*Dequantize the DC coefficient.*/
- _dct_coeffs[i][0]=(ogg_int16_t)(_dct_coeffs[i][0]*(int)_dc_quant);
- oc_idct8x8(_state,_dct_coeffs[i],_last_zzi[i]);
- }
}
-
- /*Fill in the target buffer.*/
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_PREV]]
- +_frag_buf_off;
- if (_mask & 1){
- if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[0][0],_mvs[0][1])>1){
- oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
- ystride,_dct_coeffs[0]);
- }
- else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
+ else{
+ /*Dequantize the DC coefficient.*/
+ _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+ oc_idct8x8(_state,_dct_coeffs,_last_zzi);
}
- if (_mask & 2){
- if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[1][0],_mvs[1][1])>1){
- oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
- ystride,_dct_coeffs[1]);
- }
- else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
- }
- dst+=ystride*8;
- ref+=ystride*8;
- if (_mask & 4){
- if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[2][0],_mvs[2][1])>1){
- oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
- ystride,_dct_coeffs[2]);
- }
- else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
- }
- if (_mask & 8){
- if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[3][0],_mvs[3][1])>1){
- oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
- ystride,_dct_coeffs[3]);
- }
- else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
- }
+ /*Fill in the target buffer.*/
+ ystride=_state->ref_ystride[_pli];
+ dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
+ if(_mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs);
+ else if(_dct_coeffs[0]||_last_zzi>1)oc_frag_recon_inter_mmx(dst,dst,ystride,_dct_coeffs);
}
/*We copy these entire function to inline the actual MMX routines so that we
Modified: branches/theora-gumboot/lib/x86/x86int.h
===================================================================
--- branches/theora-gumboot/lib/x86/x86int.h 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/x86int.h 2010-06-04 13:43:43 UTC (rev 17272)
@@ -62,12 +62,12 @@
void oc_idct8x8_sse2(ogg_int16_t _y[64],int _last_zzi);
void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_quad_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
-void oc_state_4mv_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mvs[4]);
+void oc_state_quad_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv);
+void oc_state_4mv_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mvs[4]);
+void oc_state_frag_residual_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
Modified: branches/theora-gumboot/lib/x86/x86state.c
===================================================================
--- branches/theora-gumboot/lib/x86/x86state.c 2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/x86state.c 2010-06-04 13:43:43 UTC (rev 17272)
@@ -72,8 +72,9 @@
_state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
_state->opt_vtable.idct8x8=oc_idct8x8_mmx;
_state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
- _state->opt_vtable.state_quad_recon=oc_state_quad_recon_mmx;
- _state->opt_vtable.state_4mv_recon=oc_state_4mv_recon_mmx;
+ _state->opt_vtable.state_quad_predict=oc_state_quad_predict_mmx;
+ _state->opt_vtable.state_4mv_predict=oc_state_4mv_predict_mmx;
+ _state->opt_vtable.state_frag_residual=oc_state_frag_residual_mmx;
_state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_mmx;
More information about the commits
mailing list