[xiph-commits] r15115 - branches/theora_multithread_decode_omp/lib/dec
piga at svn.xiph.org
piga at svn.xiph.org
Wed Jul 16 07:15:13 PDT 2008
Author: piga
Date: 2008-07-16 07:15:11 -0700 (Wed, 16 Jul 2008)
New Revision: 15115
Modified:
branches/theora_multithread_decode_omp/lib/dec/decode.c
Log:
A pipelined implementation
Modified: branches/theora_multithread_decode_omp/lib/dec/decode.c
===================================================================
--- branches/theora_multithread_decode_omp/lib/dec/decode.c 2008-07-14 22:03:27 UTC (rev 15114)
+++ branches/theora_multithread_decode_omp/lib/dec/decode.c 2008-07-16 14:15:11 UTC (rev 15115)
@@ -1439,7 +1439,7 @@
/*If chroma is sub-sampled in the vertical direction, we have to decode two
super block rows of Y' for each super block row of Cb and Cr.*/
if (_dec->state.info.frame_height > 256) {
- _pipe->mcu_nvfrags=128<<!(_dec->state.info.pixel_fmt&2);
+ _pipe->mcu_nvfrags=64<<!(_dec->state.info.pixel_fmt&2);
} else {
_pipe->mcu_nvfrags=_dec->state.info.frame_height;
}
@@ -2137,76 +2137,89 @@
int avail_fragy_end;
avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
- #pragma omp parallel for
- for(pli=0;pli<3;pli++){
- oc_fragment_plane *fplane;
- int frag_shift;
- int pp_offset;
- int sdelay;
- int edelay;
- fplane=_dec->state.fplanes+pli;
- /*Compute the first and last fragment row of the current MCU for this
- plane.*/
- frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
- pipe.fragy0[pli]=stripe_fragy>>frag_shift;
- pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
- pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
- oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
- oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
- sdelay=edelay=0;
- if(pipe.loop_filter){
- sdelay+=notstart;
- edelay+=notdone;
- oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
- refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- }
- /*To fill the borders, we have an additional two pixel delay, since a
- fragment in the next row could filter its top edge, using two pixels
- from a fragment in this row.
- But there's no reason to delay a full fragment between the two.*/
- oc_state_borders_fill_rows(&_dec->state,refi,pli,
- (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
- (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
- /*Out-of-loop post-processing.*/
- pp_offset=3*(pli!=0);
- if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
- /*Perform de-blocking in one plane.*/
- sdelay+=notstart;
- edelay+=notdone;
- oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
- _dec->state.ref_frame_bufs[refi],pli,
- pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
- /*Perform de-ringing in one plane.*/
- sdelay+=notstart;
- edelay+=notdone;
- oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
- pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- }
- }
- /*If no post-processing is done, we still need to delay a row for the
- loop filter, thanks to the strange filtering order VP3 chose.*/
- else if(pipe.loop_filter){
- sdelay+=notstart;
- edelay+=notdone;
- }
- /*Compute the intersection of the available rows in all planes.
- If chroma is sub-sampled, the effect of each of its delays is
- doubled, but luma might have more post-processing filters enabled
- than chroma, so we don't know up front which one is the limiting
- factor.*/
- avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
- avail_fragy_end=OC_MINI(avail_fragy_end,
- pipe.fragy_end[pli]-edelay<<frag_shift);
+ #pragma omp parallel
+ {
+ #pragma omp for nowait
+ for(pli=0;pli<3;pli++) {
+ oc_fragment_plane *fplane;
+ int frag_shift;
+ fplane=_dec->state.fplanes+pli;
+ /*Compute the first and last fragment row of the current MCU for this
+ plane.*/
+ frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+ pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+ pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+ pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+ oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+ oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
+ }
+ #pragma omp for
+ for(pli=0;pli<3;pli++){
+ int pp_offset;
+ int sdelay;
+ int edelay;
+ oc_fragment_plane *fplane;
+ int frag_shift;
+ fplane=_dec->state.fplanes+pli;
+ /*Compute the first and last fragment row of the current MCU for this
+ plane.*/
+ frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+
+ sdelay=edelay=0;
+ if(pipe.loop_filter){
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
+ refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ }
+ /*To fill the borders, we have an additional two pixel delay, since a
+ fragment in the next row could filter its top edge, using two pixels
+ from a fragment in this row.
+ But there's no reason to delay a full fragment between the two.*/
+ oc_state_borders_fill_rows(&_dec->state,refi,pli,
+ (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+ (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+ /*Out-of-loop post-processing.*/
+ pp_offset=3*(pli!=0);
+ if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+ /*Perform de-blocking in one plane.*/
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+ _dec->state.ref_frame_bufs[refi],pli,
+ pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+ /*Perform de-ringing in one plane.*/
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+ pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ }
+ }
+ /*If no post-processing is done, we still need to delay a row for the
+ loop filter, thanks to the strange filtering order VP3 chose.*/
+ else if(pipe.loop_filter){
+ sdelay+=notstart;
+ edelay+=notdone;
+ }
+ /*Compute the intersection of the available rows in all planes.
+ If chroma is sub-sampled, the effect of each of its delays is
+ doubled, but luma might have more post-processing filters enabled
+ than chroma, so we don't know up front which one is the limiting
+ factor.*/
+ avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
+ avail_fragy_end=OC_MINI(avail_fragy_end,
+ pipe.fragy_end[pli]-edelay<<frag_shift);
+ }
+ if(_dec->stripe_cb.stripe_decoded!=NULL){
+ /*Make the callback, ensuring we flip the sense of the "start" and
+ "end" of the available region upside down.*/
+ (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+ _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+ _dec->state.fplanes[0].nvfrags-avail_fragy0);
+ }
+ notstart=1;
}
- if(_dec->stripe_cb.stripe_decoded!=NULL){
- /*Make the callback, ensuring we flip the sense of the "start" and
- "end" of the available region upside down.*/
- (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
- _dec->state.fplanes[0].nvfrags-avail_fragy_end,
- _dec->state.fplanes[0].nvfrags-avail_fragy0);
- }
- notstart=1;
}
#ifdef _TH_DEBUG_
More information about the commits
mailing list