[xiph-commits] r8900 - in experimental/derf/theora-exp: examples include/theora lib

tterribe at motherfish-iii.xiph.org tterribe at motherfish-iii.xiph.org
Thu Feb 10 22:43:10 PST 2005


Author: tterribe
Date: 2005-02-10 22:43:03 -0800 (Thu, 10 Feb 2005)
New Revision: 8900

Modified:
   experimental/derf/theora-exp/examples/dump_video.c
   experimental/derf/theora-exp/examples/encoder_example.c
   experimental/derf/theora-exp/examples/player_example.c
   experimental/derf/theora-exp/include/theora/theora.h
   experimental/derf/theora-exp/lib/decint.h
   experimental/derf/theora-exp/lib/decode.c
   experimental/derf/theora-exp/lib/encode.c
   experimental/derf/theora-exp/lib/info.c
   experimental/derf/theora-exp/lib/internal.c
   experimental/derf/theora-exp/lib/internal.h
   experimental/derf/theora-exp/lib/psych.c
   experimental/derf/theora-exp/lib/state.c
Log:
The final set of pipelined decode changes, and also some general clean-up.

Post-processing has been moved into the pipeline.
Fragment reconstruction has been re-organized to operate on individual planes,
 so within an MCU, we go through all the steps -- DC prediction reversal,
 fragment reconstruction, uncoded fragment copying, loop filtering, border
 extension, and out-of-loop post-processing -- on a single color plane before
 proceeding to the next.
This should give us an even smaller cache footprint, potentially keeping more
 data in the L1 cache during these steps.

Finally, a striped decoding API has been added.
Example usage has been added to the dump_video example, even though it's of no
 practical value there (other than to enable testing the API itself).

A number of minor bugs and warning fixes are also included (I know I should
 split them into a separate patch, but I'm too lazy).
Most of them are implicit declaration fixes, printf argument mismatches, and
 unused variable clean-ups, but a few are noteworthy:

- theora_decoder_ctl was renamed to theora_decode_ctl, to match the declaration
 in the header (and all of the other theora_decode_* functions).
I thought I'd fixed this before, but maybe the gremlins (or the crappy Berkeley
 DB) ate it.
- A minor bitrate accounting bug in inter/intra decision was fixed (the macro
 block mode bits were not being added to the inter frame score).
- One fragment's contribution to the small masking groups in psych.c was not
 being added properly due to a copy/paste error (the effect, if any, should be
 very small).
- When storing block variances when running the out-of-loop deblocking
 filter, the fragment in the next row was being obtained by adding nvfrags,
 instead of nhfrags.
Fixing this should give a minor improvement in the deringing filter (and avoid
 a segfault for very tall, skinny video).



Modified: experimental/derf/theora-exp/examples/dump_video.c
===================================================================
--- experimental/derf/theora-exp/examples/dump_video.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/examples/dump_video.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -84,16 +84,60 @@
   got_sigint = 1;
 }
 
-static void open_video(void){
+static theora_ycbcr_buffer ycbcr;
 
+static void stripe_decoded(theora_ycbcr_buffer _dst,theora_ycbcr_buffer _src,
+ int _fragy0,int _fragy_end){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    int yshift;
+    int y_end;
+    int y;
+    yshift=pli!=0&&!(ti.pixel_fmt&2);
+    y_end=_fragy_end<<3-yshift;
+    for(y=_fragy0<<3-yshift;y<y_end;y++){
+      memcpy(_dst[pli].data+y*_dst[pli].ystride,
+       _src[pli].data+y*_src[pli].ystride,_src[pli].width);
+    }
+  }
 }
 
+static void open_video(void){
+  theora_stripe_callback cb;
+  int                    pli;
+  /*Here we allocate a buffer so we can use the striped decode feature.
+    There's no real reason to do this in this application, because we want to
+     write to the file top-down, but the frame gets decoded bottom up, so we
+     have to bufer it all anyway.
+    But this illustrates how the API works.*/
+  for(pli=0;pli<3;pli++){
+    int xshift;
+    int yshift;
+    xshift=pli!=0&&!(ti.pixel_fmt&1);
+    yshift=pli!=0&&!(ti.pixel_fmt&2);
+    ycbcr[pli].data=(char *)malloc(
+     (ti.frame_width>>xshift)*(ti.frame_height>>yshift)*sizeof(char));
+    ycbcr[pli].ystride=ti.frame_width>>xshift;
+    ycbcr[pli].width=ti.frame_width>>xshift;
+    ycbcr[pli].height=ti.frame_height>>yshift;
+  }
+  /*Similarly, since ycbcr is a global, there's no real reason to pass it as
+     the context.
+    In a more object-oriented decoder, we could pass the "this" pointer
+     instead (though in C++ platform-dependent calling convention differences
+     prevent us from using a real member function pointer).*/
+  cb.ctx=ycbcr;
+  cb.stripe_decoded=(theora_stripe_decoded_func)stripe_decoded;
+  theora_decode_ctl(td,OC_DECCTL_SET_STRIPE_CB,&cb,sizeof(cb));
+}
+
 static void video_write(void){
   int pli;
   int i;
 
+  /*Uncomment the following to do normal, non-striped decoding.
   theora_ycbcr_buffer ycbcr;
-  theora_decode_ycbcr_out(td,ycbcr);
+  theora_decode_ycbcr_out(td,ycbcr);*/
 
   for(pli=0;pli<3;pli++){
     for(i=0;i<ycbcr[pli].height;i++){
@@ -251,7 +295,7 @@
   /* and now we have it all.  initialize decoders */
   if(theora_p){
     td=theora_decode_alloc(&ti,ts);
-    fprintf(stderr,"Ogg logical stream %x is Theora %dx%d %.02f fps video\nEncoded frame content is %dx%d with %dx%d offset\n",
+    fprintf(stderr,"Ogg logical stream %lx is Theora %dx%d %.02f fps video\nEncoded frame content is %dx%d with %dx%d offset\n",
             to.serialno,ti.frame_width,ti.frame_height,
             (double)ti.fps_numerator/ti.fps_denominator,
             ti.pic_width, ti.pic_height, ti.pic_x, ti.pic_y);
@@ -296,7 +340,7 @@
 
     if(!videobuf_ready ){
       /* no data yet for somebody.  Grab another page */
-      int ret=buffer_data(infile,&oy);
+      buffer_data(infile,&oy);
       while(ogg_sync_pageout(&oy,&og)>0){
         queue_page(&og);
       }

Modified: experimental/derf/theora-exp/examples/encoder_example.c
===================================================================
--- experimental/derf/theora-exp/examples/encoder_example.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/examples/encoder_example.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -921,7 +921,6 @@
   /* You'll go to Hell for using static variables */
   static int                 state=-1;
   static unsigned char      *yuvframe[3];
-  static int                 framenum;
   static theora_ycbcr_buffer ycbcr;
   ogg_packet                 op;
   int                        pic_sz;
@@ -930,7 +929,7 @@
   int                        c_w;
   int                        c_h;
   int                        c_sz;
-  int i, e;
+  int                        i;
 
   pic_sz=pic_w*pic_h;
   frame_c_w=frame_w/dst_c_dec_h;

Modified: experimental/derf/theora-exp/examples/player_example.c
===================================================================
--- experimental/derf/theora-exp/examples/player_example.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/examples/player_example.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -570,7 +570,7 @@
   /* and now we have it all.  initialize decoders */
   if(theora_p){
     td=theora_decode_alloc(&ti,ts);
-    printf("Ogg logical stream %x is Theora %dx%d %.02f fps video\n",
+    printf("Ogg logical stream %lx is Theora %dx%d %.02f fps video\n",
            to.serialno,ti.frame_width,ti.frame_height, 
            (double)ti.fps_numerator/ti.fps_denominator);
     if(ti.frame_width!=ti.pic_width || ti.frame_height!=ti.pic_height)
@@ -578,10 +578,10 @@
            ti.pic_width, ti.pic_height, ti.pic_x, ti.pic_y);
     report_colorspace(&ti);
     dump_comments(&tc);
-    theora_decoder_ctl(td,OC_DECCTL_GET_PPLEVEL_MAX,&pp_level_max,
+    theora_decode_ctl(td,OC_DECCTL_GET_PPLEVEL_MAX,&pp_level_max,
      sizeof(pp_level_max));
     pp_level=pp_level_max;
-    theora_decoder_ctl(td,OC_DECCTL_SET_PPLEVEL,&pp_level,sizeof(pp_level));
+    theora_decode_ctl(td,OC_DECCTL_SET_PPLEVEL,&pp_level,sizeof(pp_level));
     pp_inc=0;
   }else{
     /* tear down the partial theora setup */
@@ -593,7 +593,7 @@
   if(vorbis_p){
     vorbis_synthesis_init(&vd,&vi);
     vorbis_block_init(&vd,&vb);
-    fprintf(stderr,"Ogg logical stream %x is Vorbis %d channel %d Hz audio.\n",
+    fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n",
             vo.serialno,vi.channels,vi.rate);
   }else{
     /* tear down the partial vorbis setup */
@@ -662,7 +662,7 @@
 
         if(pp_inc){
           pp_level+=pp_inc;
-          theora_decoder_ctl(td,OC_DECCTL_SET_PPLEVEL,&pp_level,
+          theora_decode_ctl(td,OC_DECCTL_SET_PPLEVEL,&pp_level,
            sizeof(pp_level));
           pp_inc=0;
         }
@@ -673,7 +673,7 @@
            page and compute the correct granulepos for the first packet after
            a seek or a gap.*/
         if(op.granulepos>=0){
-          theora_decoder_ctl(td,OC_DECCTL_SET_GRANPOS,&op.granulepos,
+          theora_decode_ctl(td,OC_DECCTL_SET_GRANPOS,&op.granulepos,
            sizeof(op.granulepos));
         }
         if(theora_decode_packetin(td,&op,&videobuf_granulepos)>=0){
@@ -688,7 +688,7 @@
           if(videobuf_time>=get_time())
             videobuf_ready=1;
           /*If we are too slow, reduce the pp level.*/
-          else pp_inc=pp_level>0?pp_inc=-1:0;
+          else pp_inc=pp_level>0?-1:0;
         }
 
       }else
@@ -699,7 +699,7 @@
 
     if(!videobuf_ready || !audiobuf_ready){
       /* no data yet for somebody.  Grab another page */
-      int ret=buffer_data(infile,&oy);
+      buffer_data(infile,&oy);
       while(ogg_sync_pageout(&oy,&og)>0){
         queue_page(&og);
       }

Modified: experimental/derf/theora-exp/include/theora/theora.h
===================================================================
--- experimental/derf/theora-exp/include/theora/theora.h	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/include/theora/theora.h	2005-02-11 06:43:03 UTC (rev 8900)
@@ -69,7 +69,7 @@
   OC_PF_NFORMATS
 }theora_pixel_fmt;
 
-/*theora_enc_ctl codes.
+/*theora_encode_ctl codes.
   By convention, these are even, to distinguish them from decoder control
    codes.
   Keep any experimental or vendor-specific values above 0x8000.*/
@@ -118,7 +118,7 @@
           OC_IMPL:   Not supported by this implementation.*/
 #define OC_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 
-/*theora_dec_ctl codes.
+/*theora_decode_ctl codes.
   By convention, these are odd, to distinguish them from encoder control codes.
   Keep any experimental or vendor-specific values above 0x8000.*/
 
@@ -148,7 +148,17 @@
           OC_EINVAL: _buf_sz is not sizeof(ogg_int64_t), or the granpos is
                       negative.*/
 #define OC_DECCTL_SET_GRANPOS (5)
+/*Sets the striped decode callback function.
+  If set, this function will be called as each piece of a frame is fully
+   decoded.
+  You can pass in a theora_stripe_callback struct with stripe_decoded set to
+   NULL to disable the callbacks at any point.
 
+  arg: [in]  theora_stripe_callback: The callback parameters.
+  Return: OC_FAULT:  _dec_ctx or _buf is NULL.
+          OC_EINVAL: _buf_sz is not sizeof(theora_stripe_callback).*/
+#define OC_DECCTL_SET_STRIPE_CB (7)
+
 /*A single color plane in an image.*/
 typedef struct{
   int            width;
@@ -167,7 +177,44 @@
   Cb and Cr stand for "Chroma blue" and "Chroma red" respectively.*/
 typedef theora_img_plane theora_ycbcr_buffer[3];
 
+/*A callback function for striped decoded.
+  This is a function pointer to an application-provided function that gets
+   called each time a section of the image is fully decoded.
+  This allows the application to process the section immediately, while it is
+   still in cache.
+  Note that the frame is decoded bottom to top, so _yfrag0 will steadily
+   decrease with each call until it reaches 0, at which point the full frame is
+   decoded.
+  The number of fragment rows made available in each call depends on the pixel
+   format and the number of post-processing filters enabled, and may not even
+   be constant for the entire frame.
+  If a non-NULL _granpos pointer is passed to theora_decode_packetin(), the
+   granule position for the frame will be stored in it before the first
+   callback is made.
+  If an entire frame is dropped (a 0-byte packet), then no callbacks will be
+   made at all for that frame.
+  _ctx:       An application-provided context pointer.
+  _buf:       The image buffer for the decoded frame.
+  _yfrag0:    The Y coordinate of the first row of 8x8 fragments decoded.
+              Multiply this by 8 to obtain the pixel row number in the luma
+               plane.
+              If the chroma planes are subsampled in the Y direction, this will
+               always be divisible by two.
+  _yfrag_end: The Y coordinate of the first row of 8x8 fragments past the
+               newly decoded section.
+              If the chroma planes are subsampled in the Y direction, this will
+               always be divisible by two.
+              I.e., this section contains fragment rows
+               _yfrag0..._yfrag_end-1.*/
+typedef void (*theora_stripe_decoded_func)(void *_ctx,theora_ycbcr_buffer _buf,
+ int _yfrag0,int _yfrag_end);
+
 typedef struct{
+  void                       *ctx;
+  theora_stripe_decoded_func  stripe_decoded;
+}theora_stripe_callback;
+
+typedef struct{
   /*Bitstream version information.*/
   unsigned char      version_major;
   unsigned char      version_minor;

Modified: experimental/derf/theora-exp/lib/decint.h
===================================================================
--- experimental/derf/theora-exp/lib/decint.h	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/decint.h	2005-02-11 06:43:03 UTC (rev 8900)
@@ -70,6 +70,8 @@
   int                      pp_frame_has_chroma;
   /*The buffer used for the post-processed frame.*/
   theora_ycbcr_buffer      pp_frame_buf;
+  /*The striped decode callback function.*/
+  theora_stripe_callback   stripe_cb;
 };
 
 /*Fix-ups for the libogg1 API, which returns -1 when there are insufficient

Modified: experimental/derf/theora-exp/lib/decode.c
===================================================================
--- experimental/derf/theora-exp/lib/decode.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/decode.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -212,6 +212,8 @@
   _dec->dc_qis=NULL;
   _dec->variances=NULL;
   _dec->pp_frame_data=NULL;
+  _dec->stripe_cb.ctx=NULL;
+  _dec->stripe_cb.stripe_decoded=NULL;
 }
 
 static void oc_dec_clear(oc_dec_ctx *_dec){
@@ -1290,9 +1292,10 @@
   int *uncoded_fragis[3];
   int  fragy0[3];
   int  fragy_end[3];
+  int  ncoded_fragis[3];
+  int  nuncoded_fragis[3];
   int  pred_last[3][3];
   int  mcu_nvfrags;
-  int  cur_fragy;
   int  loop_filter;
   int  pp_level;
 }oc_dec_pipeline_state;
@@ -1308,7 +1311,6 @@
   /*If chroma is sub-sampled in the vertical direction, we have to decode two
      super block rows of Y' for each super block row of Cb and Cr.*/
   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
-  _pipe->cur_fragy=0;
   /*Initialize the token and extra bits indices for each plane and
      coefficient.*/
   memset(_pipe->ti[0],0,sizeof(_pipe->ti[0]));
@@ -1347,103 +1349,107 @@
   }
 }
 
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+   rows).
+  As a side effect, the number of coded and uncoded fragments in this plane of
+   the MCU is also computed.*/
+static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  /*Undo the DC prediction.*/
+  oc_fragment_plane *fplane;
+  oc_fragment       *frag;
+  int               *pred_last;
+  int                ncoded_fragis;
+  int                fragx;
+  int                fragy;
+  int                fragy0;
+  int                fragy_end;
+  /*Compute the first and last fragment row of the current MCU for this
+     plane.*/
+  fplane=_dec->state.fplanes+_pli;
+  fragy0=_pipe->fragy0[_pli];
+  fragy_end=_pipe->fragy_end[_pli];
+  frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
+  ncoded_fragis=0;
+  pred_last=_pipe->pred_last[_pli];
+  for(fragy=fragy0;fragy<fragy_end;fragy++){
+    for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
+      if(!frag->coded)continue;
+      pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
+       oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
+      ncoded_fragis++;
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  /*Also save the number of uncoded fragments so we know how many to copy.*/
+  _pipe->nuncoded_fragis[_pli]=
+   (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis;
+}
+
 /*Reconstructs all coded fragments in a single MCU (one or two super block
    rows).
   This requires that each coded fragment have a proper macro block mode and
-   motion vector (if not in INTRA mode), and have it's raw DC value decoded.
+   motion vector (if not in INTRA mode), and have it's DC value decoded, with
+   the DC prediction process reversed, and the number of coded and uncoded
+   fragments in this plane of the MCU be counted.
   The token lists for each color plane and coefficient should also be filled
    in, along with initial token offsets, extra bits offsets, and EOB run
    counts.*/
-static void oc_dec_frags_recon_mcu(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe){
-  oc_fragment *frag;
-  int          ncoded_fragis[3];
-  int          nuncoded_fragis[3];
-  int          pli;
-  /*First, undo the DC prediction.*/
-  for(pli=0;pli<3;pli++){
-    oc_fragment_plane *fplane;
-    int               *pred_last;
-    int                fragx;
-    int                fragy;
-    int                fragy0;
-    int                fragy_end;
-    int                frag_shift;
-    fplane=_dec->state.fplanes+pli;
-    /*Compute the first and last fragment row of the current MCU for this
-       plane.*/
-    frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
-    _pipe->fragy0[pli]=fragy0=_pipe->cur_fragy>>frag_shift;
-    _pipe->fragy_end[pli]=fragy_end=
-     OC_MINI(fragy0+(_pipe->mcu_nvfrags>>frag_shift),fplane->nvfrags);
-    ncoded_fragis[pli]=0;
-    pred_last=_pipe->pred_last[pli];
-    frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
-    for(fragy=fragy0;fragy<fragy_end;fragy++){
-      for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
-        if(!frag->coded)continue;
-        pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
-         oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
-        ncoded_fragis[pli]++;
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  /*Decode the AC coefficients.*/
+  int *ti;
+  int *ebi;
+  int *eob_runs;
+  int *coded_fragi;
+  int *coded_fragi_end;
+  ti=_pipe->ti[_pli];
+  ebi=_pipe->ebi[_pli];
+  eob_runs=_pipe->eob_runs[_pli];
+  coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
+  coded_fragi_end+=_pipe->ncoded_fragis[_pli];
+  for(;coded_fragi<coded_fragi_end;coded_fragi++){
+    oc_fragment    *frag;
+    oc_quant_table *iquants;
+    /*This array is made one bigger than necessary so that an invalid zero
+       run cannot cause a buffer overflow.
+      The inverse zig-zag mapping sends all out of range indices to the last
+       entry of this array, where they are ignored.*/
+    ogg_int16_t    dct_coeffs[65];
+    int            fragi;
+    int            zzi;
+    int            last_zzi;
+    fragi=*coded_fragi;
+    frag=_dec->state.frags+fragi;
+    for(zzi=0;zzi<64;){
+      int token;
+      int eb;
+      last_zzi=zzi;
+      if(eob_runs[zzi]){
+        eob_runs[zzi]--;
+        break;
       }
-    }
-    /*Also save the number of uncoded fragments so we know how many to copy.*/
-    nuncoded_fragis[pli]=(fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis[pli];
-  }
-  /*Now decode the rest of the coefficients.*/
-  for(pli=0;pli<3;pli++){
-    int *ti;
-    int *ebi;
-    int *eob_runs;
-    int *coded_fragi;
-    int *coded_fragi_end;
-    ti=_pipe->ti[pli];
-    ebi=_pipe->ebi[pli];
-    eob_runs=_pipe->eob_runs[pli];
-    coded_fragi_end=coded_fragi=_pipe->coded_fragis[pli];
-    coded_fragi_end+=ncoded_fragis[pli];
-    for(;coded_fragi<coded_fragi_end;coded_fragi++){
-      oc_quant_table  *iquants;
-      /*This array is made one bigger than necessary so that an invalid zero
-         run cannot cause a buffer overflow.
-        The inverse zig-zag mapping sends all out of range indices to the last
-         entry of this array, where they are ignored.*/
-      ogg_int16_t      dct_coeffs[65];
-      int              fragi;
-      int              zzi;
-      int              last_zzi;
-      fragi=*coded_fragi;
-      frag=_dec->state.frags+fragi;
-      for(zzi=0;zzi<64;){
-        int token;
-        int eb;
-        last_zzi=zzi;
-        if(eob_runs[zzi]){
-          eob_runs[zzi]--;
-          break;
+      else{
+        int ebflag;
+        token=_dec->dct_tokens[zzi][ti[zzi]++];
+        ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
+        eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
+        ebi[zzi]+=ebflag;
+        if(token<OC_NDCT_EOB_TOKEN_MAX){
+          eob_runs[zzi]=-oc_dct_token_skip(token,eb);
         }
-        else{
-          int ebflag;
-          token=_dec->dct_tokens[zzi][ti[zzi]++];
-          ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
-          eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
-          ebi[zzi]+=ebflag;
-          if(token<OC_NDCT_EOB_TOKEN_MAX){
-            eob_runs[zzi]=-oc_dct_token_skip(token,eb);
-          }
-          else oc_dct_token_expand(token,eb,dct_coeffs,&zzi);
-        }
+        else oc_dct_token_expand(token,eb,dct_coeffs,&zzi);
       }
-      /*TODO: zzi should be exactly 64 here.
-        If it's not, we should report some kind of warning.*/
-      zzi=OC_MINI(zzi,64);
-      dct_coeffs[0]=(ogg_int16_t)frag->dc;
-      iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][pli];
-      oc_state_frag_recon(&_dec->state,frag,pli,dct_coeffs,last_zzi,zzi,
-       iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
     }
-    _pipe->coded_fragis[pli]=coded_fragi;
+    /*TODO: zzi should be exactly 64 here.
+      If it's not, we should report some kind of warning.*/
+    zzi=OC_MINI(zzi,64);
+    dct_coeffs[0]=(ogg_int16_t)frag->dc;
+    iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
+    oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
+     iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
   }
+  _pipe->coded_fragis[_pli]=coded_fragi;
   /*Right now the reconstructed MCU has only the coded blocks in it.*/
   /*TODO: We make the decision here to always copy the uncoded blocks into it
      from the reference frame.
@@ -1455,9 +1461,9 @@
      code, and the hard case (high bitrate, high resolution) is handled
      correctly.*/
   /*Copy the uncoded blocks from the previous reference frame.*/
-  for(pli=0;pli<3;pli++)_pipe->uncoded_fragis[pli]-=nuncoded_fragis[pli];
-  oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis,nuncoded_fragis,
-   OC_FRAME_SELF,OC_FRAME_PREV);
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
 }
 
 /*Filter a horizontal block edge.*/
@@ -1552,59 +1558,80 @@
   }
 }
 
-static void oc_dec_deblock_plane(oc_dec_ctx *_dec,int _pli,
- theora_img_plane *_dst,theora_img_plane *_src){
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ theora_img_plane *_dst,theora_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
   oc_fragment_plane   *fplane;
   int                 *variance;
   unsigned char       *dc_qi;
   unsigned char       *dst;
   const unsigned char *src;
+  int                  notstart;
+  int                  notdone;
+  int                  froffset;
   int                  flimit;
   int                  qstep;
+  int                  y_end;
   int                  y;
   int                  x;
   _dst+=_pli;
   _src+=_pli;
-  dst=_dst->data;
-  src=_src->data;
   fplane=_dec->state.fplanes+_pli;
-  variance=_dec->variances+fplane->froffset;
-  dc_qi=_dec->dc_qis+fplane->froffset;
-  memset(variance,0,fplane->nfrags*sizeof(variance[0]));
-  for(y=0;y<4;y++){
+  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
+  variance=_dec->variances+froffset;
+  dc_qi=_dec->dc_qis+froffset;
+  notstart=_fragy0>0;
+  notdone=_fragy_end<fplane->nvfrags;
+  /*We want to clear an extra row of variances, except at the end.*/
+  memset(variance+notstart*fplane->nhfrags,0,
+   (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0]));
+  /*Except for the first time, we want to point to the middle of the row.*/
+  y=(_fragy0<<3)+(notstart<<2);
+  dst=_dst->data+y*_dst->ystride;
+  src=_src->data+y*_src->ystride;
+  for(;y<4;y++){
     memcpy(dst,src,_dst->width*sizeof(dst[0]));
     dst+=_dst->ystride;
     src+=_src->ystride;
   }
-  for(;y<_dst->height-8;y+=8){
-    for(x=0;x<_dst->width;x+=8){
+  /*We also want to skip the last row in the frame for this loop.*/
+  y_end=_fragy_end-!notdone<<3;
+  for(;y<y_end;y+=8){
+    qstep=_dec->pp_dc_scale[*dc_qi];
+    flimit=(qstep*3)>>2;
+    oc_filter_hedge(dst,_dst->ystride,src-_src->ystride,_src->ystride,
+     qstep,flimit,variance,variance+fplane->nhfrags);
+    variance++;
+    dc_qi++;
+    for(x=8;x<_dst->width;x+=8){
       qstep=_dec->pp_dc_scale[*dc_qi];
       flimit=(qstep*3)>>2;
       oc_filter_hedge(dst+x,_dst->ystride,src+x-_src->ystride,_src->ystride,
-       qstep,flimit,variance,variance+fplane->nvfrags);
-      if(x>0){
-        oc_filter_vedge(dst+x-(_dst->ystride<<2)-4,_dst->ystride,
-         qstep,flimit,variance-1);
-      }
+       qstep,flimit,variance,variance+fplane->nhfrags);
+      oc_filter_vedge(dst+x-(_dst->ystride<<2)-4,_dst->ystride,
+       qstep,flimit,variance-1);
       variance++;
       dc_qi++;
     }
     dst+=_dst->ystride<<3;
     src+=_src->ystride<<3;
   }
-  for(;y<_dst->height;y++){
-    memcpy(dst,src,_dst->width*sizeof(dst[0]));
-    dst+=_dst->ystride;
-    src+=_src->ystride;
+  /*And finally, handle the last row in the frame, if it's in the range.*/
+  if(!notdone){
+    for(;y<_dst->height;y++){
+      memcpy(dst,src,_dst->width*sizeof(dst[0]));
+      dst+=_dst->ystride;
+      src+=_src->ystride;
+    }
+    /*Filter the last row of vertical block edges.*/
+    dc_qi++;
+    for(x=8;x<_dst->width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi++];
+      flimit=(qstep*3)>>2;
+      oc_filter_vedge(dst+x-(_dst->ystride<<3)-4,_dst->ystride,
+       qstep,flimit,variance++);
+    }
   }
-  /*Filter the last row of vertical block edges.*/
-  dc_qi++;
-  for(x=8;x<_dst->width;x+=8){
-    qstep=_dec->pp_dc_scale[*dc_qi++];
-    flimit=(qstep*3)>>2;
-    oc_filter_vedge(dst+x-(_dst->ystride<<3)-4,_dst->ystride,
-     qstep,flimit,variance++);
-  }
 }
 
 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
@@ -1730,26 +1757,30 @@
 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
 
-static void oc_dec_dering_plane(oc_dec_ctx *_dec,int _pli,
- theora_img_plane *_img){
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,theora_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
   theora_img_plane  *iplane;
   oc_fragment_plane *fplane;
   oc_fragment       *frag;
   int               *variance;
-  unsigned char     *dc_qi;
   unsigned char     *idata;
   int                sthresh;
   int                strong;
+  int                froffset;
+  int                y_end;
   int                y;
   int                x;
   iplane=_img+_pli;
   fplane=_dec->state.fplanes+_pli;
-  variance=_dec->variances+fplane->froffset;
-  frag=_dec->state.frags+fplane->froffset;
+  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
+  variance=_dec->variances+froffset;
+  frag=_dec->state.frags+froffset;
   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
-  idata=iplane->data;
-  for(y=0;y<iplane->height;y+=8){
+  y=_fragy0<<3;
+  idata=iplane->data+y*iplane->ystride;
+  y_end=_fragy_end<<3;
+  for(;y<y_end;y+=8){
     for(x=0;x<iplane->width;x+=8){
       int b;
       int qi;
@@ -1785,28 +1816,8 @@
   }
 }
 
-static void oc_dec_postprocess(oc_dec_ctx *_dec,int _pp_level){
-  /*pp_level 1: Stop after updating DC quantization indices.*/
-  if(_pp_level<=OC_PP_LEVEL_TRACKDCQI)return;
-  /*Perform de-blocking in the Y plane.*/
-  oc_dec_deblock_plane(_dec,0,_dec->pp_frame_buf,
-   _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]);
-  if(_pp_level>=OC_PP_LEVEL_DERINGY){
-    oc_dec_dering_plane(_dec,0,_dec->pp_frame_buf);
-  }
-  /*If we're not processing chroma, return.*/
-  if(_pp_level<OC_PP_LEVEL_DEBLOCKC)return;
-  oc_dec_deblock_plane(_dec,1,_dec->pp_frame_buf,
-   _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]);
-  oc_dec_deblock_plane(_dec,2,_dec->pp_frame_buf,
-   _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]);
-  if(_pp_level<=OC_PP_LEVEL_DEBLOCKC)return;
-  oc_dec_dering_plane(_dec,1,_dec->pp_frame_buf);
-  oc_dec_dering_plane(_dec,2,_dec->pp_frame_buf);
-}
 
 
-
 theora_dec_ctx *theora_decode_alloc(const theora_info *_info,
  const theora_setup_info *_setup){
   oc_dec_ctx *dec;
@@ -1824,7 +1835,7 @@
   }
 }
 
-int theora_decoder_ctl(theora_dec_ctx *_dec,int _req,void *_buf,
+int theora_decode_ctl(theora_dec_ctx *_dec,int _req,void *_buf,
  size_t _buf_sz){
   switch(_req){
     case OC_DECCTL_GET_PPLEVEL_MAX:{
@@ -1855,6 +1866,15 @@
        (granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
       return 0;
     }break;
+    case OC_DECCTL_SET_STRIPE_CB:{
+      theora_stripe_callback *cb;
+      if(_dec==NULL||_buf==NULL)return OC_FAULT;
+      if(_buf_sz!=sizeof(theora_stripe_callback))return OC_EINVAL;
+      cb=(theora_stripe_callback *)_buf;
+      _dec->stripe_cb.ctx=cb->ctx;
+      _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+      return 0;
+    }break;
     default:return OC_IMPL;
   }
 }
@@ -1868,8 +1888,11 @@
     Only proceed if we have a non-empty packet.*/
   if(_op->bytes!=0){
     oc_dec_pipeline_state pipe;
+    theora_ycbcr_buffer   stripe_buf;
+    int                   stripe_fragy;
     int                   refi;
     int                   pli;
+    int                   notstart;
     int                   notdone;
     oggpackB_readinit(&_dec->opb,_op->packet,_op->bytes);
     ret=oc_dec_frame_header_unpack(_dec);
@@ -1917,26 +1940,60 @@
     }
     oc_dec_block_qis_unpack(_dec);
     oc_dec_residual_tokens_unpack(_dec);
-    /*TODO: All of the rest of the operations -- reconstructing coded
-       fragments, copying (coded or uncoded) fragments and post-processing --
+    /*Update granule position.
+      This must be done before the striped decode callbacks so that the
+       application knows what to do with the frame data.*/
+    _dec->state.granpos=
+     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
+     (_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    /*All of the rest of the operations -- DC prediction reversal,
+       reconstructing coded fragments, copying uncoded fragments, loop
+       filtering, extending borders, and out-of-loop and post-processing --
        should be pipelined.
-      i.e., after sufficient numbers of super-block rows are reconstructed,
-       copy back fragments, then after sufficient numbers of super-block rows
-       are complete, post-process.
-      This allows us to operate on the data while it is still in cache,
-       resulting in big performance improvements.
-      An application callback to allow it to process each super-block row as it
-       is decoded is also a good idea.*/
+      I.e., DC prediction reversal, reconstruction, and uncoded fragment
+       copying are done for one or two super block rows, then loop filtering is
+       run as far as it can, then bordering copying, then post-processing.
+      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+       block rows, and one chroma.
+      Otherwise, an MCU consists of one super block row from each plane.
+      Inside each MCU, we perform all of the steps on one color plane before
+       moving on to the next.
+      After reconstruction, the additional filtering stages introduce a delay
+       since they need some pixels from the next fragment row.
+      Thus the actual number of decoded rows available is slightly smaller for
+       the first MCU, and slightly larger for the last.
+
+      This entire process allows us to operate on the data while it is still in
+       cache, resulting in big performance improvements.
+      An application callback allows further application processing (blitting
+       to video memory, color conversion, etc.) to also use the data while it's
+       in cache.*/
     oc_dec_pipeline_init(_dec,&pipe);
-    do{
-      int notstart;
-      int sdelay;
-      int edelay;
-      oc_dec_frags_recon_mcu(_dec,&pipe);
-      notstart=pipe.cur_fragy>0;
-      pipe.cur_fragy+=pipe.mcu_nvfrags;
-      notdone=pipe.cur_fragy<_dec->state.fplanes[0].nvfrags;
+    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+    notstart=0;
+    notdone=1;
+    for(stripe_fragy=notstart=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
+      int avail_fragy0;
+      int avail_fragy_end;
+      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
       for(pli=0;pli<3;pli++){
+        oc_fragment_plane *fplane;
+        int                frag_shift;
+        int                pp_offset;
+        int                sdelay;
+        int                edelay;
+        fplane=_dec->state.fplanes+pli;
+        /*Compute the first and last fragment row of the current MCU for this
+           plane.*/
+        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
         sdelay=edelay=0;
         if(pipe.loop_filter){
           sdelay+=notstart;
@@ -1947,20 +2004,49 @@
         /*To fill the borders, we have an additional two pixel delay, since a
            fragment in the next row could filter its top edge, using two pixels
            from a fragment in this row.
-          But there's no reason to delay a full fragment.*/
+          But there's no reason to delay a full fragment between the two.*/
         oc_state_borders_fill_rows(&_dec->state,refi,pli,
          (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
          (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+        sdelay+=notstart;
+        edelay+=notdone;
+        /*Out-of-loop post-processing.*/
+        pp_offset=3*(pli!=0);
+        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+          /*Perform de-blocking in the Y plane.*/
+          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+           _dec->state.ref_frame_bufs[refi],pli,
+           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+            sdelay+=notstart;
+            edelay+=notdone;
+            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          }
+        }
+        /*Compute the intersection of the available rows in all planes.
+          If chroma is sub-sampled, the effect of each of its delays is
+           doubled, but luma might have more post-processing filters enabled
+           than chroma, so we don't know up front which one is the limiting
+           factor.*/
+        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
+        avail_fragy_end=OC_MINI(avail_fragy_end,
+         pipe.fragy_end[pli]-edelay<<frag_shift);
       }
-      /*TODO: Out-of-loop post-processing.*/
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+        /*Make the callback, ensuring we flip the sense of the "start" and
+           "end" of the available region upside down.*/
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+         _dec->state.fplanes[0].nvfrags-avail_fragy0);
+      }
+      notstart=1;
     }
-    while(notdone);
     /*Finish filling in the reference frame borders.*/
     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
     /*Update the reference frame indices.*/
     if(_dec->state.frame_type==OC_INTRA_FRAME){
       /*The new frame becomes both the previous and gold reference frames.*/
-      _dec->state.keyframe_num=_dec->state.curframe_num;
       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
        _dec->state.ref_frame_idx[OC_FRAME_SELF];
@@ -1970,17 +2056,15 @@
       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
        _dec->state.ref_frame_idx[OC_FRAME_SELF];
     }
-    /*Perform out-of-loop post-processing, if enabled.*/
-    if(pipe.pp_level>OC_PP_LEVEL_TRACKDCQI){
-      oc_dec_postprocess(_dec,pipe.pp_level);
-    }
   }
-  /*Update granule position.*/
-  _dec->state.granpos=
-   (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
-   (_dec->state.curframe_num-_dec->state.keyframe_num);
-  _dec->state.curframe_num++;
-  if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+  else{
+    /*Just update the granule position and return.*/
+    _dec->state.granpos=
+     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
+     (_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+  }
   return 0;
 }
 

Modified: experimental/derf/theora-exp/lib/encode.c
===================================================================
--- experimental/derf/theora-exp/lib/encode.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/encode.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -1452,10 +1452,8 @@
     if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
     /*Pull off the associated extra bits, if any, and decode the runs.*/
     ebi=_enc->nextra_bits[zzj];
-    old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?
-     _enc->extra_bits[zzj][--ebi]:0;
-    old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?
-     old_eb2=_enc->extra_bits[zzi][0]:0;
+    old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?_enc->extra_bits[zzj][--ebi]:0;
+    old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?_enc->extra_bits[zzi][0]:0;
     runl=OC_EOB_OFFS[old_tok1]+old_eb1+OC_EOB_OFFS[old_tok2]+old_eb2;
     /*We can't possibly combine these into one run.
       It might be possible to split them more optimally, but we'll just leave
@@ -2271,7 +2269,7 @@
     _enc->mv_scheme=0;
     inter_bits+=mvbitsa;
   }
-   _enc->mode_scheme_chooser.scheme_bits[
+  inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
    _enc->mode_scheme_chooser.scheme_list[0]];
   /*The easiest way to count the bits needed for coded/not coded fragments is
      to code them.
@@ -2610,6 +2608,7 @@
       int *plfragis[3];
       int  ncoded;
       int  nuncoded;
+      int  pli;
       ncoded=_enc->state.ncoded_fragis[0]+_enc->state.ncoded_fragis[1]+
        _enc->state.ncoded_fragis[2];
       nuncoded=_enc->state.nfrags-ncoded;
@@ -2619,8 +2618,10 @@
         plfragis[0]=_enc->state.coded_fragis;
         plfragis[1]=plfragis[0]+_enc->state.ncoded_fragis[0];
         plfragis[2]=plfragis[1]+_enc->state.ncoded_fragis[1];
-        oc_state_frag_copy(&_enc->state,plfragis,_enc->state.ncoded_fragis,
-         OC_FRAME_PREV,OC_FRAME_SELF);
+        for(pli=0;pli<3;pli++){
+          oc_state_frag_copy(&_enc->state,plfragis[pli],
+           _enc->state.ncoded_fragis[pli],OC_FRAME_PREV,OC_FRAME_SELF,pli);
+        }
         _enc->state.ref_frame_idx[OC_FRAME_SELF]=
          _enc->state.ref_frame_idx[OC_FRAME_PREV];
       }
@@ -2628,8 +2629,10 @@
         plfragis[0]=_enc->state.uncoded_fragis-_enc->state.nuncoded_fragis[0];
         plfragis[1]=plfragis[0]-_enc->state.nuncoded_fragis[1];
         plfragis[2]=plfragis[1]-_enc->state.nuncoded_fragis[2];
-        oc_state_frag_copy(&_enc->state,plfragis,_enc->state.nuncoded_fragis,
-         OC_FRAME_SELF,OC_FRAME_PREV);
+        for(pli=0;pli<3;pli++){
+          oc_state_frag_copy(&_enc->state,plfragis[pli],
+           _enc->state.nuncoded_fragis[pli],OC_FRAME_SELF,OC_FRAME_PREV,pli);
+        }
         _enc->state.ref_frame_idx[OC_FRAME_PREV]=
          _enc->state.ref_frame_idx[OC_FRAME_SELF];
       }

Modified: experimental/derf/theora-exp/lib/info.c
===================================================================
--- experimental/derf/theora-exp/lib/info.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/info.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <ctype.h>
 #include <string.h>
 #include "theora/theora.h"
 

Modified: experimental/derf/theora-exp/lib/internal.c
===================================================================
--- experimental/derf/theora-exp/lib/internal.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/internal.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -107,7 +107,7 @@
            skipped in the current block.
           Otherwise, the negative of the return value indicates that number of
            blocks are to be ended.*/
-typedef (*oc_token_skip_func)(int _token,int _extra_bits);
+typedef int (*oc_token_skip_func)(int _token,int _extra_bits);
 
 /*Handles the simple end of block tokens.*/
 static int oc_token_skip_eob(int _token,int _extra_bits){

Modified: experimental/derf/theora-exp/lib/internal.h
===================================================================
--- experimental/derf/theora-exp/lib/internal.h	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/internal.h	2005-02-11 06:43:03 UTC (rev 8900)
@@ -356,6 +356,9 @@
 
 int oc_state_init(oc_theora_state *_state,const theora_info *_info);
 void oc_state_clear(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
 void oc_state_borders_fill(oc_theora_state *_state,int _refi);
 void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
  theora_ycbcr_buffer _img);
@@ -365,9 +368,12 @@
 void oc_state_frag_recon(oc_theora_state *_state,const oc_fragment *_frag,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis[3],
- int _nfragis[3],int _dst_frame,int _src_frame);
+void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli);
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
 void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end);
 #if defined(OC_DUMP_IMAGES)
 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
  const char *_suf);

Modified: experimental/derf/theora-exp/lib/psych.c
===================================================================
--- experimental/derf/theora-exp/lib/psych.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/psych.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -2207,7 +2207,7 @@
           group_sums[i]=_psych->mask_groups[o][mfragj][i]+
            _psych->mask_groups[o-1][mfragj][i]+
            _psych->mask_groups[o+1][mfragj][i]+
-           _psych->mask_groups[o][mfragj-1][i];
+           _psych->mask_groups[o][mfragj-1][i]+
            _psych->mask_groups[o][mfragj+1][i];
         }
         /*Mask the coefficients in this block.*/

Modified: experimental/derf/theora-exp/lib/state.c
===================================================================
--- experimental/derf/theora-exp/lib/state.c	2005-02-11 04:18:42 UTC (rev 8899)
+++ experimental/derf/theora-exp/lib/state.c	2005-02-11 06:43:03 UTC (rev 8900)
@@ -844,40 +844,36 @@
 
 /*Copies the fragments specified by the lists of fragment indices from one
    frame to another.
-  _fragis:    A pointer to a list of fragment indices for each color plane.
-  _nfragis:   The number of fragment indices to copy in each list.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
   _dst_frame: The reference frame to copy to.
-  _src_frame: The reference frame to copy from.*/
-void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis[3],
- int _nfragis[3],int _dst_frame,int _src_frame){
-  int dst_framei;
-  int dst_ystride;
-  int src_framei;
-  int src_ystride;
-  int pli;
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli){
+  const int *fragi;
+  const int *fragi_end;
+  int        dst_framei;
+  int        dst_ystride;
+  int        src_framei;
+  int        src_ystride;
   dst_framei=_state->ref_frame_idx[_dst_frame];
   src_framei=_state->ref_frame_idx[_src_frame];
-  for(pli=0;pli<3;pli++){
-    const int *fragi;
-    const int *fragi_end;
-    dst_ystride=_state->ref_frame_bufs[dst_framei][pli].ystride;
-    src_ystride=_state->ref_frame_bufs[src_framei][pli].ystride;
-    fragi=_fragis[pli];
-    fragi_end=fragi+_nfragis[pli];
-    for(;fragi<fragi_end;fragi++){
-      oc_fragment  *frag;
-      unsigned char *dst;
-      unsigned char *src;
-      int            j;
-      int            i;
-      frag=_state->frags+*fragi;
-      dst=frag->buffer[dst_framei];
-      src=frag->buffer[src_framei];
-      for(j=0;j<8;j++){
-        for(i=0;i<8;i++)dst[i]=src[i];
-        dst+=dst_ystride;
-        src+=src_ystride;
-      }
+  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].ystride;
+  src_ystride=_state->ref_frame_bufs[src_framei][_pli].ystride;
+  fragi_end=_fragis+_nfragis;
+  for(fragi=_fragis;fragi<fragi_end;fragi++){
+    oc_fragment   *frag;
+    unsigned char *dst;
+    unsigned char *src;
+    int            j;
+    frag=_state->frags+*fragi;
+    dst=frag->buffer[dst_framei];
+    src=frag->buffer[src_framei];
+    for(j=0;j<8;j++){
+      memcpy(dst,src,sizeof(dst[0])*8);
+      dst+=dst_ystride;
+      src+=src_ystride;
     }
   }
 }
@@ -913,8 +909,7 @@
        The total array size should be 512, but this pointer should point to the
          256th entry, as that is more convenient for the filter functions.
   Return: 0 on success, or a non-zero value if no filtering need be applied.*/
-int oc_state_loop_filter_init(oc_theora_state *_state,
- int *_bv){
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv){
   int flimit;
   int i;
   flimit=_state->loop_filter_limits[_state->qis[0]];
@@ -989,7 +984,6 @@
   int bounding_values[512];
   int framei;
   int pli;
-  int i;
   framei=_state->ref_frame_idx[_frame];
   if(oc_state_loop_filter_init(_state,bounding_values+256))return;
   for(pli=0;pli<3;pli++){



More information about the commits mailing list