[xiph-commits] r17563 - in trunk/theora/lib: . arm x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Mon Oct 25 10:40:54 PDT 2010
Author: tterribe
Date: 2010-10-25 10:40:54 -0700 (Mon, 25 Oct 2010)
New Revision: 17563
Modified:
trunk/theora/lib/analyze.c
trunk/theora/lib/arm/armstate.c
trunk/theora/lib/decode.c
trunk/theora/lib/encode.c
trunk/theora/lib/mcenc.c
trunk/theora/lib/state.c
trunk/theora/lib/state.h
trunk/theora/lib/x86/mmxstate.c
trunk/theora/lib/x86_vc/mmxstate.c
Log:
Ensure frame rows are 16-byte aligned.
We don't actually use this for anything yet, but it may help calling
applications (e.g., doing software YUV2RGB conversion).
Also, change ref_frame_data to point directly to the desired reference frame,
rather than require a lookup through ref_frame_idx first.
This saves an indirection and gives a 0.7% speed-up at 720p on a Cortex A8.
It should have an even bigger benefit on C64x, though it wasn't benchmarked
there.
Modified: trunk/theora/lib/analyze.c
===================================================================
--- trunk/theora/lib/analyze.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/analyze.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -610,14 +610,13 @@
static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
- int refi;
/*Copy over all the uncoded fragments from this plane and advance the uncoded
fragment list.*/
if(_pipe->nuncoded_fragis[_pli]>0){
_pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
oc_frag_copy_list(&_enc->state,
- _enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]],
- _enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]],
+ _enc->state.ref_frame_data[OC_FRAME_SELF],
+ _enc->state.ref_frame_data[OC_FRAME_PREV],
_enc->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
_pipe->nuncoded_fragis[_pli],_enc->state.frag_buf_offs);
_pipe->nuncoded_fragis[_pli]=0;
@@ -636,17 +635,18 @@
_pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
_pipe->ncoded_fragis[_pli]=0;
/*Apply the loop filter if necessary.*/
- refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
if(_pipe->loop_filter){
- oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values,
- refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
+ oc_state_loop_filter_frag_rows(&_enc->state,
+ _pipe->bounding_values,OC_FRAME_SELF,_pli,
+ _pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
}
else _sdelay=_edelay=0;
/*To fill borders, we have an additional two pixel delay, since a fragment
in the next row could filter its top edge, using two pixels from a
fragment in this row.
But there's no reason to delay a full fragment between the two.*/
- oc_state_borders_fill_rows(&_enc->state,refi,_pli,
+ oc_state_borders_fill_rows(&_enc->state,
+ _enc->state.ref_frame_idx[OC_FRAME_SELF],_pli,
(_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
(_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
}
@@ -696,8 +696,7 @@
frags=_enc->state.frags;
frag_offs=_enc->state.frag_buf_offs[_fragi];
ystride=_enc->state.ref_ystride[_pli];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]]
- +frag_offs;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
borderi=frags[_fragi].borderi;
qii=frags[_fragi].qii;
data=_enc->pipe.dct_data;
@@ -718,9 +717,8 @@
}
refi=frags[_fragi].refi;
mb_mode=frags[_fragi].mb_mode;
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[refi]]+frag_offs;
- dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
- +frag_offs;
+ ref=_enc->state.ref_frame_data[refi]+frag_offs;
+ dst=_enc->state.ref_frame_data[OC_FRAME_SELF]+frag_offs;
/*Motion compensation:*/
switch(mb_mode){
case OC_MODE_INTRA:{
@@ -1146,7 +1144,7 @@
int bi;
frag_buf_offs=_enc->state.frag_buf_offs;
sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
ystride=_enc->state.ref_ystride[0];
luma=0;
for(bi=0;bi<4;bi++){
@@ -1363,7 +1361,7 @@
unsigned dc;
frag_buf_offs=_enc->state.frag_buf_offs;
sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
ystride=_enc->state.ref_ystride[0];
for(bi=0;bi<4;bi++){
fragi=sb_map[bi];
@@ -1412,7 +1410,7 @@
int bi;
frag_buf_offs=_enc->state.frag_buf_offs;
sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
ystride=_enc->state.ref_ystride[0];
fragi=sb_maps[_mbi>>2][_mbi&3][0];
frag_offs=frag_buf_offs[fragi];
@@ -1501,7 +1499,7 @@
int lambda;
int ystride;
int nqis;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
ystride=_enc->state.ref_ystride[_pli];
frag_offs=_enc->state.frag_buf_offs[_fragi];
satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
@@ -1956,8 +1954,8 @@
ptrdiff_t fragi;
ptrdiff_t frag_offs;
int borderi;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
ystride=_enc->state.ref_ystride[0];
frags=_enc->state.frags;
frag_buf_offs=_enc->state.frag_buf_offs;
@@ -2051,9 +2049,8 @@
ptrdiff_t fragi;
ptrdiff_t frag_offs;
unsigned dc;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[
- _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[OC_FRAME_FOR_MODE(_mb_mode)];
ystride=_enc->state.ref_ystride[0];
frag_buf_offs=_enc->state.frag_buf_offs;
sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
@@ -2163,8 +2160,8 @@
int bits1;
unsigned satd;
unsigned dc;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
ystride=_enc->state.ref_ystride[0];
frag_buf_offs=_enc->state.frag_buf_offs;
frag_mvs=_enc->state.frag_mvs;
Modified: trunk/theora/lib/arm/armstate.c
===================================================================
--- trunk/theora/lib/arm/armstate.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/arm/armstate.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -119,12 +119,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
@@ -159,12 +159,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
@@ -199,12 +199,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
Modified: trunk/theora/lib/decode.c
===================================================================
--- trunk/theora/lib/decode.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/decode.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -1597,8 +1597,8 @@
if(_pipe->nuncoded_fragis[_pli]>0){
_pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
oc_frag_copy_list(&_dec->state,
- _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
- _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_PREV]],
+ _dec->state.ref_frame_data[OC_FRAME_SELF],
+ _dec->state.ref_frame_data[OC_FRAME_PREV],
_dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
_pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
}
@@ -2053,26 +2053,33 @@
buffers (i.e., decoding did not start on a key frame).
We initialize them to a solid gray here.*/
static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
- th_info *info;
- size_t yplane_sz;
- size_t cplane_sz;
- int yhstride;
- int yheight;
- int chstride;
- int cheight;
+ th_info *info;
+ size_t yplane_sz;
+ size_t cplane_sz;
+ ptrdiff_t yoffset;
+ int yhstride;
+ int yheight;
+ int chstride;
+ int cheight;
_dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
_dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
_dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
+ _dec->state.ref_frame_data[OC_FRAME_GOLD]=
+ _dec->state.ref_frame_data[OC_FRAME_PREV]=
+ _dec->state.ref_frame_data[OC_FRAME_SELF]=
+ _dec->state.ref_frame_bufs[0][0].data;
memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
sizeof(_dec->pp_frame_buf[0])*3);
info=&_dec->state.info;
- yhstride=info->frame_width+2*OC_UMV_PADDING;
+ yhstride=abs(_dec->state.ref_ystride[0]);
yheight=info->frame_height+2*OC_UMV_PADDING;
- chstride=yhstride>>!(info->pixel_fmt&1);
+ chstride=abs(_dec->state.ref_ystride[1]);
cheight=yheight>>!(info->pixel_fmt&2);
- yplane_sz=yhstride*(size_t)yheight;
+ yplane_sz=yhstride*(size_t)yheight+16;
cplane_sz=chstride*(size_t)cheight;
- memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
+ yoffset=_dec->state.ref_ystride[0]*(yheight-1)-
+ (OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride);
+ memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
}
int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
@@ -2119,6 +2126,8 @@
for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
_dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+ _dec->state.ref_frame_data[OC_FRAME_SELF]=
+ _dec->state.ref_frame_bufs[refi][0].data;
#if defined(HAVE_CAIRO)
_dec->telemetry_frame_bytes=_op->bytes;
#endif
@@ -2207,7 +2216,7 @@
sdelay+=notstart;
edelay+=notdone;
oc_state_loop_filter_frag_rows(&_dec->state,
- _dec->pipe.bounding_values,refi,pli,
+ _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
_dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
}
/*To fill the borders, we have an additional two pixel delay, since a
@@ -2272,11 +2281,16 @@
_dec->state.ref_frame_idx[OC_FRAME_GOLD]=
_dec->state.ref_frame_idx[OC_FRAME_PREV]=
_dec->state.ref_frame_idx[OC_FRAME_SELF];
+ _dec->state.ref_frame_data[OC_FRAME_GOLD]=
+ _dec->state.ref_frame_data[OC_FRAME_PREV]=
+ _dec->state.ref_frame_data[OC_FRAME_SELF];
}
else{
/*Otherwise, just replace the previous reference frame.*/
_dec->state.ref_frame_idx[OC_FRAME_PREV]=
_dec->state.ref_frame_idx[OC_FRAME_SELF];
+ _dec->state.ref_frame_data[OC_FRAME_PREV]=
+ _dec->state.ref_frame_data[OC_FRAME_SELF];
}
/*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
gamma values, if nothing else).*/
Modified: trunk/theora/lib/encode.c
===================================================================
--- trunk/theora/lib/encode.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/encode.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -1255,6 +1255,8 @@
/*Use the previous frame's reconstruction.*/
_enc->state.ref_frame_idx[OC_FRAME_SELF]=
_enc->state.ref_frame_idx[OC_FRAME_PREV];
+ _enc->state.ref_frame_data[OC_FRAME_SELF]=
+ _enc->state.ref_frame_data[OC_FRAME_PREV];
/*Flag motion vector analysis about the frame drop.*/
_enc->prevframe_dropped=1;
/*Zero the packet.*/
@@ -1690,27 +1692,37 @@
if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
_enc->state.ref_frame_idx[OC_FRAME_PREV]=
_enc->state.ref_frame_idx[OC_FRAME_SELF];
+ _enc->state.ref_frame_data[OC_FRAME_PREV]=
+ _enc->state.ref_frame_data[OC_FRAME_SELF];
if(_enc->state.frame_type==OC_INTRA_FRAME){
/*The new frame becomes both the previous and gold reference frames.*/
_enc->state.keyframe_num=_enc->state.curframe_num;
_enc->state.ref_frame_idx[OC_FRAME_GOLD]=
_enc->state.ref_frame_idx[OC_FRAME_SELF];
+ _enc->state.ref_frame_data[OC_FRAME_GOLD]=
+ _enc->state.ref_frame_data[OC_FRAME_SELF];
}
}
if(_enc->state.ref_frame_idx[OC_FRAME_IO]>=0&&_enc->prevframe_dropped==0){
_enc->state.ref_frame_idx[OC_FRAME_PREV_ORIG]=
_enc->state.ref_frame_idx[OC_FRAME_IO];
+ _enc->state.ref_frame_data[OC_FRAME_PREV_ORIG]=
+ _enc->state.ref_frame_data[OC_FRAME_IO];
if(_enc->state.frame_type==OC_INTRA_FRAME){
/*The new input frame becomes both the previous and gold
original-reference frames.*/
_enc->state.ref_frame_idx[OC_FRAME_GOLD_ORIG]=
_enc->state.ref_frame_idx[OC_FRAME_IO];
+ _enc->state.ref_frame_data[OC_FRAME_GOLD_ORIG]=
+ _enc->state.ref_frame_data[OC_FRAME_IO];
}
}
/*Select a free buffer to use for the incoming frame*/
for(refi=3;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD_ORIG]||
refi==_enc->state.ref_frame_idx[OC_FRAME_PREV_ORIG];refi++);
_enc->state.ref_frame_idx[OC_FRAME_IO]=refi;
+ _enc->state.ref_frame_data[OC_FRAME_IO]=
+ _enc->state.ref_frame_bufs[refi][0].data;
/*Step 3: Copy the input to our internal buffer.
This lets us add padding, so we don't have to worry about dereferencing
possibly invalid addresses, and allows us to use the same strides and
@@ -1729,6 +1741,8 @@
for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++);
_enc->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+ _enc->state.ref_frame_data[OC_FRAME_SELF]=
+ _enc->state.ref_frame_bufs[refi][0].data;
_enc->state.curframe_num+=_enc->prev_dup_count+1;
/*Step 4: Compress the frame.*/
/*Start with a keyframe, and don't allow the generation of invalid files that
Modified: trunk/theora/lib/mcenc.c
===================================================================
--- trunk/theora/lib/mcenc.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/mcenc.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -308,9 +308,9 @@
hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame_full]];
- satd_ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_frame_full];
+ satd_ref=_enc->state.ref_frame_data[_frame];
ystride=_enc->state.ref_ystride[0];
/*TODO: customize error function for speed/(quality+size) tradeoff.*/
best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
@@ -557,8 +557,8 @@
int best_site;
int sitei;
int err;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_framei];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
ystride=_enc->state.ref_ystride[0];
@@ -612,8 +612,8 @@
int best_site;
int sitei;
int err;
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_frame];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
ystride=_enc->state.ref_ystride[0];
@@ -763,8 +763,8 @@
ystride=_enc->state.ref_ystride[0];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
offset_y[3]=offset_y[5]=0;
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
Modified: trunk/theora/lib/state.c
===================================================================
--- trunk/theora/lib/state.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/state.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -548,6 +548,7 @@
int yheight;
int chstride;
int cheight;
+ ptrdiff_t align;
ptrdiff_t yoffset;
ptrdiff_t coffset;
ptrdiff_t *frag_buf_offs;
@@ -563,21 +564,26 @@
vdec=!(info->pixel_fmt&2);
yhstride=info->frame_width+2*OC_UMV_PADDING;
yheight=info->frame_height+2*OC_UMV_PADDING;
- chstride=yhstride>>hdec;
+ /*Require 16-byte aligned rows in the chroma planes.*/
+ chstride=(yhstride>>hdec)+15&~15;
cheight=yheight>>vdec;
yplane_sz=yhstride*(size_t)yheight;
cplane_sz=chstride*(size_t)cheight;
yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
- ref_frame_sz=yplane_sz+2*cplane_sz;
+ /*Although we guarantee the rows of the chroma planes are a multiple of 16
+ bytes, the initial padding on the first row may only be 8 bytes.
+ Compute the offset needed to the actual image data to a multiple of 16.*/
+ align=-coffset&15;
+ ref_frame_sz=yplane_sz+2*cplane_sz+16;
ref_frame_data_sz=_nrefs*ref_frame_sz;
/*Check for overflow.
The same caveats apply as for oc_state_frarray_init().*/
- if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
+ if(yplane_sz/yhstride!=yheight||2*cplane_sz+16<cplane_sz||
ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
return TH_EIMPL;
}
- ref_frame_data=_ogg_malloc(ref_frame_data_sz);
+ ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
frag_buf_offs=_state->frag_buf_offs=
_ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
if(ref_frame_data==NULL||frag_buf_offs==NULL){
@@ -599,15 +605,15 @@
memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
sizeof(_state->ref_frame_bufs[0]));
}
+ _state->ref_frame_handle=ref_frame_data;
/*Set up the data pointers for the image buffers.*/
for(rfi=0;rfi<_nrefs;rfi++){
- _state->ref_frame_data[rfi]=ref_frame_data;
_state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
- ref_frame_data+=yplane_sz;
+ ref_frame_data+=yplane_sz+align;
_state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
ref_frame_data+=cplane_sz;
_state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
- ref_frame_data+=cplane_sz;
+ ref_frame_data+=cplane_sz+(16-align);
/*Flip the buffer upside down.
This allows us to decode Theora's bottom-up frames in their natural
order, yet return a top-down buffer with a positive stride to the user.*/
@@ -617,7 +623,7 @@
_state->ref_ystride[0]=-yhstride;
_state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
/*Initialize the fragment buffer offsets.*/
- ref_frame_data=_state->ref_frame_data[0];
+ ref_frame_data=_state->ref_frame_bufs[0][0].data;
fragi=0;
for(pli=0;pli<3;pli++){
th_img_plane *iplane;
@@ -643,19 +649,25 @@
vpix+=stride<<3;
}
}
- /*Initialize the reference frame indices.*/
+ /*Initialize the reference frame pointers and indices.*/
_state->ref_frame_idx[OC_FRAME_GOLD]=
_state->ref_frame_idx[OC_FRAME_PREV]=
_state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
_state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
_state->ref_frame_idx[OC_FRAME_SELF]=
_state->ref_frame_idx[OC_FRAME_IO]=-1;
+ _state->ref_frame_data[OC_FRAME_GOLD]=
+ _state->ref_frame_data[OC_FRAME_PREV]=
+ _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
+ _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
+ _state->ref_frame_data[OC_FRAME_SELF]=
+ _state->ref_frame_data[OC_FRAME_IO]=NULL;
return 0;
}
static void oc_state_ref_bufs_clear(oc_theora_state *_state){
_ogg_free(_state->frag_buf_offs);
- _ogg_free(_state->ref_frame_data[0]);
+ oc_aligned_free(_state->ref_frame_handle);
}
@@ -963,12 +975,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2(_state,
Modified: trunk/theora/lib/state.h
===================================================================
--- trunk/theora/lib/state.h 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/state.h 2010-10-25 17:40:54 UTC (rev 17563)
@@ -427,12 +427,16 @@
ptrdiff_t ncoded_fragis[3];
/*The total number of coded fragments.*/
ptrdiff_t ntotal_coded_fragis;
+ /*The actual buffers used for the reference frames.*/
+ th_ycbcr_buffer ref_frame_bufs[6];
/*The index of the buffers being used for each OC_FRAME_* reference frame.*/
int ref_frame_idx[6];
- /*The actual buffers used for the reference frames.*/
- th_ycbcr_buffer ref_frame_bufs[6];
- /*The storage for the reference frame buffers.*/
+ /*The storage for the reference frame buffers.
+ This is just ref_frame_bufs[ref_frame_idx[i]][0].data, but is cached here
+ for faster look-up.*/
unsigned char *ref_frame_data[6];
+ /*The handle used to allocate the reference frame buffers.*/
+ unsigned char *ref_frame_handle;
/*The strides for each plane in the reference frames.*/
int ref_ystride[3];
/*The number of unique border patterns.*/
Modified: trunk/theora/lib/x86/mmxstate.c
===================================================================
--- trunk/theora/lib/x86/mmxstate.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/x86/mmxstate.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -69,12 +69,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
Modified: trunk/theora/lib/x86_vc/mmxstate.c
===================================================================
--- trunk/theora/lib/x86_vc/mmxstate.c 2010-10-24 04:50:46 UTC (rev 17562)
+++ trunk/theora/lib/x86_vc/mmxstate.c 2010-10-25 17:40:54 UTC (rev 17563)
@@ -80,12 +80,12 @@
frag_buf_off=_state->frag_buf_offs[_fragi];
refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
- dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+ dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
+ ref=_state->ref_frame_data[refi]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
More information about the commits
mailing list