[xiph-commits] r15977 - in branches/theora-thusnelda/lib: . dec dec/x86

tterribe at svn.xiph.org tterribe at svn.xiph.org
Sat May 2 10:49:35 PDT 2009


Author: tterribe
Date: 2009-05-02 10:49:35 -0700 (Sat, 02 May 2009)
New Revision: 15977

Modified:
   branches/theora-thusnelda/lib/dec/decint.h
   branches/theora-thusnelda/lib/dec/decode.c
   branches/theora-thusnelda/lib/dec/fragment.c
   branches/theora-thusnelda/lib/dec/internal.c
   branches/theora-thusnelda/lib/dec/state.c
   branches/theora-thusnelda/lib/dec/x86/mmxidct.c
   branches/theora-thusnelda/lib/dec/x86/mmxstate.c
   branches/theora-thusnelda/lib/dec/x86/x86int.h
   branches/theora-thusnelda/lib/internal.h
Log:
Decoder modifications in preparation for merging more of the encoder and
 decoder code.
As a side effect, this speeds things up 3.2% on x86-32 and 11.1% on x86-64.


Modified: branches/theora-thusnelda/lib/dec/decint.h
===================================================================
--- branches/theora-thusnelda/lib/dec/decint.h	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/decint.h	2009-05-02 17:49:35 UTC (rev 15977)
@@ -58,15 +58,15 @@
   oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
   /*The index of one past the last token in each plane for each coefficient.
     The final entries are the total number of tokens for each coefficient.*/
-  int                  ti0[3][64];
+  ptrdiff_t            ti0[3][64];
   /*The index of one past the last extra bits entry in each plane for each
      coefficient.
     The final entries are the total number of extra bits entries for each
      coefficient.*/
-  int                  ebi0[3][64];
+  ptrdiff_t            ebi0[3][64];
   /*The number of outstanding EOB runs at the start of each coefficient in each
      plane.*/
-  int                  eob_runs[3][64];
+  ptrdiff_t            eob_runs[3][64];
   /*The DCT token lists.*/
   unsigned char      **dct_tokens;
   /*The extra bits associated with DCT tokens.*/

Modified: branches/theora-thusnelda/lib/dec/decode.c
===================================================================
--- branches/theora-thusnelda/lib/dec/decode.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/decode.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -47,7 +47,7 @@
 
 /*The mode alphabets for the various mode coding schemes.
   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const int OC_MODE_ALPHABETS[7][OC_NMODES]={
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
   /*Last MV dominates */
   {
     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
@@ -193,9 +193,9 @@
   _dec->variances=NULL;
   _dec->pp_frame_data=NULL;
   _dec->telemetry_frame_data=NULL;
-  _dec->telemetry = 0;
-  _dec->telemetry_mv = 0;
-  _dec->telemetry_mbmode = 0;
+  _dec->telemetry=0;
+  _dec->telemetry_mv=0;
+  _dec->telemetry_mbmode=0;
   _dec->stripe_cb.ctx=NULL;
   _dec->stripe_cb.stripe_decoded=NULL;
   return 0;
@@ -222,19 +222,19 @@
   /*Read in the frame type (I or P).*/
   theorapackB_read1(&_dec->opb,&val);
   _dec->state.frame_type=(int)val;
-  /*Read in the current qi.*/
+  /*Read in the qi list.*/
   theorapackB_read(&_dec->opb,6,&val);
-  _dec->state.qis[0]=(int)val;
+  _dec->state.qis[0]=(unsigned char)val;
   theorapackB_read1(&_dec->opb,&val);
   if(!val)_dec->state.nqis=1;
   else{
     theorapackB_read(&_dec->opb,6,&val);
-    _dec->state.qis[1]=(int)val;
+    _dec->state.qis[1]=(unsigned char)val;
     theorapackB_read1(&_dec->opb,&val);
     if(!val)_dec->state.nqis=2;
     else{
       theorapackB_read(&_dec->opb,6,&val);
-      _dec->state.qis[2]=(int)val;
+      _dec->state.qis[2]=(unsigned char)val;
       _dec->state.nqis=3;
     }
   }
@@ -242,7 +242,8 @@
     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
       Most of the other unused bits in the VP3 headers were eliminated.
       I don't know why these remain.*/
-    /* I wanted to eliminate wasted bits, but not all config wiggle room --Monty */
+    /*I wanted to eliminate wasted bits, but not all config wiggle room
+       --Monty.*/
     theorapackB_read(&_dec->opb,3,&val);
     if(val!=0)return TH_EIMPL;
   }
@@ -252,33 +253,37 @@
 /*Mark all fragments as coded and in OC_MODE_INTRA.
   This also builds up the coded fragment list (in coded order), and clears the
    uncoded fragment list.
-  It does not update the coded macro block list, as that is not used when
-   decoding INTRA frames.*/
+  It does not update the coded macro block list nor the super block flags, as
+   those are not used when decoding INTRA frames.*/
 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                pli;
+  coded_fragis=_dec->state.coded_fragis;
   prev_ncoded_fragis=ncoded_fragis=0;
-  sb=sb_end=_dec->state.sbs;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=0;
   for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    fplane=_dec->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
       int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
         int bi;
         for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
           if(fragi>=0){
-            oc_fragment *frag;
-            frag=_dec->state.frags+fragi;
-            frag->coded=1;
-            frag->mbmode=OC_MODE_INTRA;
-            _dec->state.coded_fragis[ncoded_fragis++]=fragi;
+            frags[fragi].coded=1;
+            frags[fragi].mb_mode=OC_MODE_INTRA;
+            coded_fragis[ncoded_fragis++]=fragi;
           }
         }
       }
@@ -289,33 +294,34 @@
   }
 }
 
-/*Decodes the bit flags for whether or not each super block is partially coded
+/*Decodes the bit flags indicating whether each super block is partially coded
    or not.
   Return: The number of partially coded super blocks.*/
 static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    flag;
-  int    npartial;
-  int    run_count;
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     npartial;
+  unsigned     run_count;
+  long         val;
+  int          flag;
   theorapackB_read1(&_dec->opb,&val);
   flag=(int)val;
-  sb=_dec->state.sbs;
-  sb_end=sb+_dec->state.nsbs;
-  run_count=npartial=0;
-  while(sb<sb_end){
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  sbi=run_count=npartial=0;
+  while(sbi<nsbs){
     int full_run;
     run_count=oc_sb_run_unpack(&_dec->opb);
     full_run=run_count>=4129;
     do{
-      sb->coded_partially=flag;
-      sb->coded_fully=0;
+      sb_flags[sbi].coded_partially=flag;
+      sb_flags[sbi].coded_fully=0;
       npartial+=flag;
-      sb++;
+      sbi++;
     }
-    while(--run_count>0&&sb<sb_end);
-    if(full_run&&sb<sb_end){
+    while(--run_count>0&&sbi<nsbs);
+    if(full_run&&sbi<nsbs){
       theorapackB_read1(&_dec->opb,&val);
       flag=(int)val;
     }
@@ -332,27 +338,28 @@
    non-partially-coded super block.
   Return: The number of partially coded super blocks.*/
 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    flag;
-  int    run_count;
-  sb=_dec->state.sbs;
-  sb_end=sb+_dec->state.nsbs;
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
   /*Skip partially coded super blocks.*/
-  for(;sb->coded_partially;sb++);
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
   theorapackB_read1(&_dec->opb,&val);
   flag=(int)val;
-  while(sb<sb_end){
+  while(sbi<nsbs){
     int full_run;
     run_count=oc_sb_run_unpack(&_dec->opb);
     full_run=run_count>=4129;
-    for(;sb<sb_end;sb++){
-      if(sb->coded_partially)continue;
+    for(;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
       if(run_count--<=0)break;
-      sb->coded_fully=flag;
+      sb_flags[sbi].coded_fully=flag;
     }
-    if(full_run&&sb<sb_end){
+    if(full_run&&sbi<nsbs){
       theorapackB_read1(&_dec->opb,&val);
       flag=(int)val;
     }
@@ -363,17 +370,20 @@
 }
 
 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    npartial;
-  int    pli;
-  int    flag;
-  int    run_count;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  int    nuncoded_fragis;
-  int    prev_nuncoded_fragis;
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  long               val;
+  int                pli;
+  int                flag;
+  int                run_count;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  ptrdiff_t          nuncoded_fragis;
+  ptrdiff_t          prev_nuncoded_fragis;
   npartial=oc_dec_partial_sb_flags_unpack(_dec);
   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
   if(npartial>0){
@@ -381,35 +391,37 @@
     flag=!(int)val;
   }
   else flag=0;
-  run_count=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=run_count=0;
   prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
-  sb=sb_end=_dec->state.sbs;
   for(pli=0;pli<3;pli++){
     const oc_fragment_plane *fplane;
     fplane=_dec->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
+    nsbs+=fplane->nsbs;
+    for(;sbi<nsbs;sbi++){
       int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
         int bi;
         for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
           if(fragi>=0){
-            oc_fragment *frag;
-            frag=_dec->state.frags+fragi;
-            if(sb->coded_fully)frag->coded=1;
-            else if(!sb->coded_partially)frag->coded=0;
+            int coded;
+            if(sb_flags[sbi].coded_fully)coded=1;
+            else if(!sb_flags[sbi].coded_partially)coded=0;
             else{
               if(run_count<=0){
                 run_count=oc_block_run_unpack(&_dec->opb);
                 flag=!flag;
               }
               run_count--;
-              frag->coded=flag;
+              coded=flag;
             }
-            if(frag->coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi;
+            if(coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi;
             else *(_dec->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=coded;
           }
         }
       }
@@ -445,12 +457,14 @@
 
 /*Unpacks the list of macro block modes for INTER frames.*/
 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+  const oc_mb_map     *mb_maps;
+  signed char         *mb_modes;
+  const unsigned char *alphabet;
+  unsigned char        scheme0_alphabet[8];
   oc_mode_unpack_func  mode_unpack;
-  oc_mb               *mb;
-  oc_mb               *mb_end;
-  const int           *alphabet;
+  size_t               nmbs;
+  size_t               mbi;
   long                 val;
-  int                  scheme0_alphabet[8];
   int                  mode_scheme;
   theorapackB_read(&_dec->opb,3,&val);
   mode_scheme=(int)val;
@@ -471,18 +485,22 @@
   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
   if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
   else mode_unpack=oc_vlc_mode_unpack;
-  mb=_dec->state.mbs;
-  mb_end=mb+_dec->state.nmbs;
-  for(;mb<mb_end;mb++){
-    if(mb->mode!=OC_MODE_INVALID){
+  mb_modes=_dec->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    if(mb_modes[mbi]!=OC_MODE_INVALID){
       int bi;
+      /*Check for a coded luma block in this macro block.*/
       for(bi=0;bi<4;bi++){
-        int fragi;
-        fragi=mb->map[0][bi];
+        ptrdiff_t fragi;
+        fragi=mb_maps[mbi][0][bi];
         if(fragi>=0&&_dec->state.frags[fragi].coded)break;
       }
-      if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
-      else mb->mode=OC_MODE_INTER_NOMV;
+      /*We found one, decode a mode.*/
+      if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
+      /*There were none: INTER_NOMV is forced.*/
+      else mb_modes[mbi]=OC_MODE_INTER_NOMV;
     }
   }
 }
@@ -493,7 +511,8 @@
 
 static int oc_vlc_mv_comp_unpack(oggpack_buffer *_opb){
   long bits;
-  int  mvsigned[2];
+  int  mask;
+  int  mv;
   theorapackB_read(_opb,3,&bits);
   switch(bits){
     case  0:return 0;
@@ -501,40 +520,45 @@
     case  2:return -1;
     case  3:
     case  4:{
-      mvsigned[0]=(int)(bits-1);
+      mv=(int)(bits-1);
       theorapackB_read1(_opb,&bits);
     }break;
     /*case  5:
     case  6:
     case  7:*/
     default:{
-      mvsigned[0]=1<<bits-3;
+      mv=1<<bits-3;
       theorapackB_read(_opb,bits-2,&bits);
-      mvsigned[0]+=(int)(bits>>1);
+      mv+=(int)(bits>>1);
       bits&=1;
     }break;
   }
-  mvsigned[1]=-mvsigned[0];
-  return mvsigned[bits];
+  mask=-(int)bits;
+  return (mv+mask)^mask;
 }
 
 static int oc_clc_mv_comp_unpack(oggpack_buffer *_opb){
   long bits;
-  int  mvsigned[2];
+  int  mask;
+  int  mv;
   theorapackB_read(_opb,6,&bits);
-  mvsigned[0]=bits>>1;
-  mvsigned[1]=-mvsigned[0];
-  return mvsigned[bits&1];
+  mv=(int)bits>>1;
+  mask=-((int)bits&1);
+  return (mv+mask)^mask;
 }
 
 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
    block modes and motion vectors to the individual fragments.*/
 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+  const oc_mb_map        *mb_maps;
+  const signed char      *mb_modes;
   oc_set_chroma_mvs_func  set_chroma_mvs;
   oc_mv_comp_unpack_func  mv_comp_unpack;
-  oc_mb                  *mb;
-  oc_mb                  *mb_end;
-  const int              *map_idxs;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  size_t                  nmbs;
+  size_t                  mbi;
+  const unsigned char    *map_idxs;
   long                    val;
   int                     map_nidxs;
   oc_mv                   last_mv[2];
@@ -545,97 +569,98 @@
   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
   memset(last_mv,0,sizeof(last_mv));
-  mb=_dec->state.mbs;
-  mb_end=mb+_dec->state.nmbs;
-
-  for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
-    oc_fragment *frag;
-    oc_mv        mbmv;
-    int          coded[13];
-    int          codedi;
-    int          ncoded;
-    int          mapi;
-    int          mapii;
-    int          fragi;
+  frags=_dec->state.frags;
+  frag_mvs=_dec->state.frag_mvs;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
     int          mb_mode;
-    /*Search for at least one coded fragment.*/
-    ncoded=mapii=0;
-    do{
-      mapi=map_idxs[mapii];
-      fragi=mb->map[mapi>>2][mapi&3];
-      if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi;
-    }
-    while(++mapii<map_nidxs);
-    if(ncoded<=0)continue;
-    mb_mode=mb->mode;
-    switch(mb_mode){
-      case OC_MODE_INTER_MV_FOUR:{
-        oc_mv       lbmvs[4];
-        int         bi;
-        /*Mark the tail of the list, so we don't accidentally go past it.*/
-        coded[ncoded]=-1;
-        for(bi=codedi=0;bi<4;bi++){
-          if(coded[codedi]==bi){
-            codedi++;
-            frag=_dec->state.frags+mb->map[0][bi];
-            frag->mbmode=mb_mode;
-            frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-            frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+    mb_mode=mb_modes[mbi];
+    if(mb_mode!=OC_MODE_INVALID){
+      oc_mv        mbmv;
+      ptrdiff_t    fragi;
+      int          coded[13];
+      int          codedi;
+      int          ncoded;
+      int          mapi;
+      int          mapii;
+      /*Search for at least one coded fragment.*/
+      ncoded=mapii=0;
+      do{
+        mapi=map_idxs[mapii];
+        fragi=mb_maps[mbi][mapi>>2][mapi&3];
+        if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      while(++mapii<map_nidxs);
+      if(ncoded<=0)continue;
+      switch(mb_mode){
+        case OC_MODE_INTER_MV_FOUR:{
+          oc_mv       lbmvs[4];
+          int         bi;
+          /*Mark the tail of the list, so we don't accidentally go past it.*/
+          coded[ncoded]=-1;
+          for(bi=codedi=0;bi<4;bi++){
+            if(coded[codedi]==bi){
+              codedi++;
+              fragi=mb_maps[mbi][0][bi];
+              frags[fragi].mb_mode=mb_mode;
+              lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
+            }
+            else lbmvs[bi][0]=lbmvs[bi][1]=0;
           }
-          else lbmvs[bi][0]=lbmvs[bi][1]=0;
-        }
-        if(codedi>0){
+          if(codedi>0){
+            last_mv[1][0]=last_mv[0][0];
+            last_mv[1][1]=last_mv[0][1];
+            last_mv[0][0]=lbmvs[coded[codedi-1]][0];
+            last_mv[0][1]=lbmvs[coded[codedi-1]][1];
+          }
+          if(codedi<ncoded){
+            (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+            for(;codedi<ncoded;codedi++){
+              mapi=coded[codedi];
+              bi=mapi&3;
+              fragi=mb_maps[mbi][mapi>>2][bi];
+              frags[fragi].mb_mode=mb_mode;
+              memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
+            }
+          }
+        }break;
+        case OC_MODE_INTER_MV:{
           last_mv[1][0]=last_mv[0][0];
           last_mv[1][1]=last_mv[0][1];
-          last_mv[0][0]=lbmvs[coded[codedi-1]][0];
-          last_mv[0][1]=lbmvs[coded[codedi-1]][1];
+          mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        case OC_MODE_INTER_MV_LAST:{
+          mbmv[0]=last_mv[0][0];
+          mbmv[1]=last_mv[0][1];
+        }break;
+        case OC_MODE_INTER_MV_LAST2:{
+          mbmv[0]=last_mv[1][0];
+          mbmv[1]=last_mv[1][1];
+          last_mv[1][0]=last_mv[0][0];
+          last_mv[1][1]=last_mv[0][1];
+          last_mv[0][0]=mbmv[0];
+          last_mv[0][1]=mbmv[1];
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        default:mbmv[0]=mbmv[1]=0;break;
+      }
+      /*4MV mode fills in the fragments itself.
+        For all other modes we can use this common code.*/
+      if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb_maps[mbi][mapi>>2][mapi&3];
+          frags[fragi].mb_mode=mb_mode;
+          memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
         }
-        if(codedi<ncoded){
-          (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
-          for(;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            bi=mapi&3;
-            frag=_dec->state.frags+mb->map[mapi>>2][bi];
-            frag->mbmode=mb_mode;
-            frag->mv[0]=cbmvs[bi][0];
-            frag->mv[1]=cbmvs[bi][1];
-          }
-        }
-      }break;
-      case OC_MODE_INTER_MV:{
-        last_mv[1][0]=last_mv[0][0];
-        last_mv[1][1]=last_mv[0][1];
-        mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-        mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-      }break;
-      case OC_MODE_INTER_MV_LAST:{
-        mbmv[0]=last_mv[0][0];
-        mbmv[1]=last_mv[0][1];
-      }break;
-      case OC_MODE_INTER_MV_LAST2:{
-        mbmv[0]=last_mv[1][0];
-        mbmv[1]=last_mv[1][1];
-        last_mv[1][0]=last_mv[0][0];
-        last_mv[1][1]=last_mv[0][1];
-        last_mv[0][0]=mbmv[0];
-        last_mv[0][1]=mbmv[1];
-      }break;
-      case OC_MODE_GOLDEN_MV:{
-        mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-        mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-      }break;
-      default:mbmv[0]=mbmv[1]=0;break;
-    }
-    /*4MV mode fills in the fragments itself.
-      For all other modes we can use this common code.*/
-    if(mb_mode!=OC_MODE_INTER_MV_FOUR){
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        frag=_dec->state.frags+fragi;
-        frag->mbmode=mb_mode;
-        frag->mv[0]=mbmv[0];
-        frag->mv[1]=mbmv[1];
       }
     }
   }
@@ -643,9 +668,9 @@
 
 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
   oc_fragment *frag;
-  int         *coded_fragi;
-  int         *coded_fragi_end;
-  int          ncoded_fragis;
+  ptrdiff_t   *coded_fragi;
+  ptrdiff_t   *coded_fragi_end;
+  ptrdiff_t    ncoded_fragis;
   ncoded_fragis=_dec->state.ncoded_fragis[0]+
    _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
   if(ncoded_fragis<=0)return;
@@ -730,7 +755,8 @@
 
 
 
-/*Returns the decoded value of the given token.
+/*Returns the decoded value of the first coefficient produced by the given
+   token.
   It CANNOT be called for any of the EOB tokens.
   _token:      The token value to skip.
   _extra_bits: The extra bits attached to this token.
@@ -776,7 +802,7 @@
   return (VAL_CAT_OFFS[cati]+(_extra_bits&VAL_CAT_MASKS[cati])+mask)^mask;
 }
 
-/*A jump table for compute the first coefficient value the given token value
+/*A jump table for computing the first coefficient value the given token value
    represents.*/
 static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS-
  OC_NDCT_EOB_TOKEN_MAX]={
@@ -807,7 +833,8 @@
   (oc_token_dec1val_func)oc_token_dec1val_zrl
 };
 
-/*Returns the decoded value of the given token.
+/*Returns the decoded value of the first coefficient produced by the given
+   token.
   It CANNOT be called for any of the EOB tokens.
   _token:      The token value to skip.
   _extra_bits: The extra bits attached to this token.
@@ -825,19 +852,21 @@
                 each coefficient.
                This is updated as EOB tokens and zero run tokens are decoded.
   Return: The length of any outstanding EOB run.*/
-static int oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3],
- int _ntoks_left[3][64]){
-  long  val;
-  int  *coded_fragi;
-  int  *coded_fragi_end;
-  int   run_counts[64];
-  int   cfi;
-  int   eobi;
-  int   eobs;
-  int   ti;
-  int   ebi;
-  int   pli;
-  int   rli;
+static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3],
+ ptrdiff_t _ntoks_left[3][64]){
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragi;
+  const ptrdiff_t *coded_fragi_end;
+  ptrdiff_t        run_counts[64];
+  ptrdiff_t        cfi;
+  ptrdiff_t        eobi;
+  ptrdiff_t        eobs;
+  ptrdiff_t        ti;
+  ptrdiff_t        ebi;
+  long             val;
+  int              pli;
+  int              rli;
+  frags=_dec->state.frags;
   eobs=0;
   ti=ebi=0;
   coded_fragi_end=coded_fragi=_dec->state.coded_fragis;
@@ -847,7 +876,7 @@
     _dec->eob_runs[pli][0]=eobs;
     /*Continue any previous EOB run, if there was one.*/
     for(eobi=eobs;eobi-->0&&coded_fragi<coded_fragi_end;){
-      _dec->state.frags[*coded_fragi++].dc=0;
+      frags[*coded_fragi++].dc=0;
     }
     cfi=0;
     while(eobs<_ntoks_left[pli][0]-cfi){
@@ -870,15 +899,13 @@
       skip=oc_dct_token_skip(token,eb);
       if(skip<0){
         eobs=eobi=-skip;
-        while(eobi-->0&&coded_fragi<coded_fragi_end){
-          _dec->state.frags[*coded_fragi++].dc=0;
-        }
+        while(eobi-->0&&coded_fragi<coded_fragi_end)frags[*coded_fragi++].dc=0;
       }
       else{
         run_counts[skip-1]++;
         cfi++;
         eobs=0;
-        _dec->state.frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb);
+        frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb);
       }
     }
     _dec->ti0[pli][0]=ti;
@@ -909,14 +936,14 @@
                 coefficients.
   Return: The length of any outstanding EOB run.*/
 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[3],
- int _ntoks_left[3][64],int _eobs){
-  long val;
-  int  run_counts[64];
-  int  cfi;
-  int  ti;
-  int  ebi;
-  int  pli;
-  int  rli;
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+  ptrdiff_t run_counts[64];
+  ptrdiff_t cfi;
+  ptrdiff_t ti;
+  ptrdiff_t ebi;
+  long      val;
+  int       pli;
+  int       rli;
   ti=ebi=0;
   for(pli=0;pli<3;pli++){
     memset(run_counts,0,sizeof(run_counts));
@@ -982,23 +1009,22 @@
    each of the 64 coefficients, instead of a counter for every fragment to
    determine where the next token goes.
 
-  Actually, we use 3 counters per coefficient, one for each color plane, so we
+  We actually use 3 counters per coefficient, one for each color plane, so we
    can decode all color planes simultaneously.
-
   This lets color conversion, etc., be done as soon as a full MCU (one or
    two super block rows) is decoded, while the image data is still in cache.*/
 
 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
-  static const int OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
-  long val;
-  int  ntoks_left[3][64];
-  int  huff_idxs[3];
-  int  pli;
-  int  zzi;
-  int  hgi;
-  int  huffi_y;
-  int  huffi_c;
-  int  eobs;
+  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+  ptrdiff_t  ntoks_left[3][64];
+  int        huff_idxs[3];
+  ptrdiff_t  eobs;
+  long       val;
+  int        pli;
+  int        zzi;
+  int        hgi;
+  int        huffi_y;
+  int        huffi_c;
   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
   }
@@ -1024,7 +1050,7 @@
   }
   /*TODO: eobs should be exactly zero, or 4096 or greater.
     The second case occurs when an EOB run of size zero is encountered, which
-     gets treated as an infinite EOB run (where infinity is INT_MAX).
+     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
     If neither of these conditions holds, then a warning should be issued.*/
 }
 
@@ -1037,87 +1063,81 @@
   _token:      The token value to expand.
   _extra_bits: The extra bits associated with the token.
   _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        A pointer to the zig-zag index of the next coefficient to write
-                to.
-               This is updated before the function returns.*/
-typedef void (*oc_token_expand_func)(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi);
+  _zzi:        The zig-zag index of the next coefficient to write to.
+  Return: The updated index of the next coefficient to write to.*/
+typedef int (*oc_token_expand_func)(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi);
 
 /*Expands a zero run token.*/
-static void oc_token_expand_zrl(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  int zzi;
-  zzi=*_zzi;
-  do _dct_coeffs[zzi++]=0;
+static int oc_token_expand_zrl(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  do _dct_coeffs[_zzi++]=0;
   while(_extra_bits-->0);
-  *_zzi=zzi;
+  return _zzi;
 }
 
 /*Expands a constant, single-value token.*/
-static void oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token);
+static int oc_token_expand_const(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_const(_token);
+  return _zzi;
 }
 
 /*Expands category 2 single-valued tokens.*/
-static void oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=
-   (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
+static int oc_token_expand_cat2(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
+  return _zzi;
 }
 
 /*Expands category 3 through 8 single-valued tokens.*/
-static void oc_token_expand_cati(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=
-   (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
+static int oc_token_expand_cati(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
+  return _zzi;
 }
 
 /*Expands a category 1a zero run/value combo token.*/
-static void oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  int zzi;
+static int oc_token_expand_run_cat1a(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
   int rl;
-  zzi=*_zzi;
   /*LOOP VECTORIZES.*/
-  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
-  _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
-  *_zzi=zzi;
+  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[_zzi++]=0;
+  _dct_coeffs[_zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
+  return _zzi;
 }
 
 /*Expands all other zero run/value combo tokens.*/
-static void oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+static int oc_token_expand_run(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  static const unsigned char NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     6,10,1,2
   };
-  static const int NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     3,7,0,1
   };
-  static const int VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     0,0,0,1
   };
-  static const int VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     0,0,1,1
   };
-  static const int VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     1,1,2,2
   };
-  static const int SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     2,3,1,2
   };
   int mask;
-  int zzi;
   int rl;
   _token-=OC_DCT_RUN_CAT1B;
   rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
-  zzi=*_zzi;
   /*LOOP VECTORIZES.*/
-  while(rl-->0)_dct_coeffs[zzi++]=0;
+  while(rl-->0)_dct_coeffs[_zzi++]=0;
   mask=-(_extra_bits>>SIGN_SHIFT[_token]);
-  _dct_coeffs[zzi++]=(ogg_int16_t)((VALUE_ADJUST[_token]+
+  _dct_coeffs[_zzi++]=(ogg_int16_t)((VALUE_ADJUST[_token]+
    (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token])+mask)^mask);
-  *_zzi=zzi;
+  return _zzi;
 }
 
 /*A jump table for expanding token values into coefficient values.
@@ -1159,12 +1179,11 @@
   _token:      The token value to expand.
   _extra_bits: The extra bits associated with the token.
   _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        A pointer to the zig-zag index of the next coefficient to write
-                to.
-               This is updated before the function returns.*/
-static void oc_dct_token_expand(int _token,int _extra_bits,
- ogg_int16_t *_dct_coeffs,int *_zzi){
-  (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
+  _zzi:        The zig-zag index of the next coefficient to write to.
+  Return: The updated index of the next coefficient to write to.*/
+static int oc_dct_token_expand(int _token,int _extra_bits,
+ ogg_int16_t _dct_coeffs[128],int _zzi){
+  return (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
    _extra_bits,_dct_coeffs,_zzi);
 }
 
@@ -1192,8 +1211,8 @@
     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
   }
   else{
-    int           *coded_fragi;
-    int           *coded_fragi_end;
+    ptrdiff_t     *coded_fragi;
+    ptrdiff_t     *coded_fragi_end;
     unsigned char  qi0;
     /*Update the DC quantization index of each coded block.*/
     qi0=(unsigned char)_dec->state.qis[0];
@@ -1227,7 +1246,7 @@
       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
-       (1-_dec->pp_frame_buf[0].height)*_dec->pp_frame_buf[0].stride;
+       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
     }
     else{
       size_t y_sz;
@@ -1271,20 +1290,20 @@
 
 
 typedef struct{
-  int  ti[3][64];
-  int  ebi[3][64];
-  int  eob_runs[3][64];
-  int  bounding_values[256];
-  int *coded_fragis[3];
-  int *uncoded_fragis[3];
-  int  fragy0[3];
-  int  fragy_end[3];
-  int  ncoded_fragis[3];
-  int  nuncoded_fragis[3];
-  int  pred_last[3][3];
-  int  mcu_nvfrags;
-  int  loop_filter;
-  int  pp_level;
+  int              bounding_values[256];
+  ptrdiff_t        ti[3][64];
+  ptrdiff_t        ebi[3][64];
+  ptrdiff_t        eob_runs[3][64];
+  const ptrdiff_t *coded_fragis[3];
+  const ptrdiff_t *uncoded_fragis[3];
+  ptrdiff_t        ncoded_fragis[3];
+  ptrdiff_t        nuncoded_fragis[3];
+  int              fragy0[3];
+  int              fragy_end[3];
+  int              pred_last[3][3];
+  int              mcu_nvfrags;
+  int              loop_filter;
+  int              pp_level;
 }oc_dec_pipeline_state;
 
 
@@ -1292,9 +1311,9 @@
 /*Initialize the main decoding pipeline.*/
 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe){
-  int *coded_fragi_end;
-  int *uncoded_fragi_end;
-  int  pli;
+  const ptrdiff_t *coded_fragi_end;
+  const ptrdiff_t *uncoded_fragi_end;
+  int              pli;
   /*If chroma is sub-sampled in the vertical direction, we have to decode two
      super block rows of Y' for each super block row of Cb and Cr.*/
   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
@@ -1342,27 +1361,28 @@
    the MCU is also computed.*/
 static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli){
-  /*Undo the DC prediction.*/
-  oc_fragment_plane *fplane;
-  oc_fragment       *frag;
-  int               *pred_last;
-  int                ncoded_fragis;
-  int                fragx;
-  int                fragy;
-  int                fragy0;
-  int                fragy_end;
+  const oc_fragment_plane *fplane;
+  oc_fragment             *frag;
+  int                     *pred_last;
+  ptrdiff_t                ncoded_fragis;
+  int                      fragx;
+  int                      fragy;
+  int                      fragy0;
+  int                      fragy_end;
+  int                      nhfrags;
   /*Compute the first and last fragment row of the current MCU for this
      plane.*/
   fplane=_dec->state.fplanes+_pli;
   fragy0=_pipe->fragy0[_pli];
   fragy_end=_pipe->fragy_end[_pli];
-  frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
+  nhfrags=fplane->nhfrags;
+  frag=_dec->state.frags+fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
   ncoded_fragis=0;
   pred_last=_pipe->pred_last[_pli];
   for(fragy=fragy0;fragy<fragy_end;fragy++){
-    for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
+    for(fragx=0;fragx<nhfrags;fragx++,frag++){
       if(!frag->coded)continue;
-      pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
+      pred_last[OC_FRAME_FOR_MODE[frag->mb_mode]]=frag->dc+=
        oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
       ncoded_fragis++;
     }
@@ -1370,7 +1390,7 @@
   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
   /*Also save the number of uncoded fragments so we know how many to copy.*/
   _pipe->nuncoded_fragis[_pli]=
-   (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis;
+   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
 }
 
 /*Reconstructs all coded fragments in a single MCU (one or two super block
@@ -1384,30 +1404,28 @@
    counts.*/
 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli){
-  /*Decode the AC coefficients.*/
-  int *ti;
-  int *ebi;
-  int *eob_runs;
-  int *coded_fragi;
-  int *coded_fragi_end;
+  const oc_fragment *frags;
+  const ptrdiff_t   *coded_fragi;
+  const ptrdiff_t   *coded_fragi_end;
+  ptrdiff_t         *ti;
+  ptrdiff_t         *ebi;
+  ptrdiff_t         *eob_runs;
+  frags=_dec->state.frags;
+  coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
+  coded_fragi_end+=_pipe->ncoded_fragis[_pli];
   ti=_pipe->ti[_pli];
   ebi=_pipe->ebi[_pli];
   eob_runs=_pipe->eob_runs[_pli];
-  coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
-  coded_fragi_end+=_pipe->ncoded_fragis[_pli];
   for(;coded_fragi<coded_fragi_end;coded_fragi++){
-    oc_fragment    *frag;
     oc_quant_table *quants;
-    /*This array is made one bigger than necessary so that an invalid zero
-       run cannot cause a buffer overflow.
-      The inverse zig-zag mapping sends all out of range indices to the last
-       entry of this array, where they are ignored.*/
+    /*This array is made twice as large as necessary so that an invalid zero
+       run cannot cause a buffer overflow.*/
     ogg_int16_t    dct_coeffs[128];
-    int            fragi;
+    ptrdiff_t      fragi;
     int            zzi;
     int            last_zzi;
     fragi=*coded_fragi;
-    frag=_dec->state.frags+fragi;
+    /*Decode the AC coefficients.*/
     for(zzi=0;zzi<64;){
       int token;
       int eb;
@@ -1425,18 +1443,19 @@
         if(token<OC_NDCT_EOB_TOKEN_MAX){
           eob_runs[zzi]=-oc_dct_token_skip(token,eb);
         }
-        else oc_dct_token_expand(token,eb,dct_coeffs,&zzi);
+        else zzi=oc_dct_token_expand(token,eb,dct_coeffs,zzi);
       }
     }
     /*TODO: zzi should be exactly 64 here.
       If it's not, we should report some kind of warning.*/
     zzi=OC_MINI(zzi,64);
-    dct_coeffs[0]=(ogg_int16_t)frag->dc;
-    quants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
+    dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+    quants=
+     _dec->state.dequant_tables[frags[fragi].mb_mode!=OC_MODE_INTRA][_pli];
     /*last_zzi is always initialized.
       If your compiler thinks otherwise, it is dumb.*/
-    oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
-     quants[_dec->state.qis[0]][0],quants[frag->qi]);
+    oc_state_frag_recon(&_dec->state,fragi,_pli,dct_coeffs,last_zzi,zzi,
+     quants[_dec->state.qis[0]][0],quants[frags[fragi].qi]);
   }
   _pipe->coded_fragis[_pli]=coded_fragi;
   /*Right now the reconstructed MCU has only the coded blocks in it.*/
@@ -1555,9 +1574,13 @@
   unsigned char       *dc_qi;
   unsigned char       *dst;
   const unsigned char *src;
+  ptrdiff_t            froffset;
+  int                  dst_ystride;
+  int                  src_ystride;
+  int                  nhfrags;
+  int                  width;
   int                  notstart;
   int                  notdone;
-  int                  froffset;
   int                  flimit;
   int                  qstep;
   int                  y_end;
@@ -1566,58 +1589,64 @@
   _dst+=_pli;
   _src+=_pli;
   fplane=_dec->state.fplanes+_pli;
-  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
   variance=_dec->variances+froffset;
   dc_qi=_dec->dc_qis+froffset;
   notstart=_fragy0>0;
   notdone=_fragy_end<fplane->nvfrags;
   /*We want to clear an extra row of variances, except at the end.*/
-  memset(variance+(fplane->nhfrags&-notstart),0,
-   (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0]));
+  memset(variance+(nhfrags&-notstart),0,
+   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
   /*Except for the first time, we want to point to the middle of the row.*/
   y=(_fragy0<<3)+(notstart<<2);
-  dst=_dst->data+y*_dst->stride;
-  src=_src->data+y*_src->stride;
+  dst_ystride=_dst->stride;
+  src_ystride=_src->stride;
+  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+  src=_src->data+y*(ptrdiff_t)src_ystride;
+  width=_dst->width;
   for(;y<4;y++){
-    memcpy(dst,src,_dst->width*sizeof(dst[0]));
-    dst+=_dst->stride;
-    src+=_src->stride;
+    memcpy(dst,src,width*sizeof(dst[0]));
+    dst+=dst_ystride;
+    src+=src_ystride;
   }
   /*We also want to skip the last row in the frame for this loop.*/
   y_end=_fragy_end-!notdone<<3;
   for(;y<y_end;y+=8){
     qstep=_dec->pp_dc_scale[*dc_qi];
     flimit=(qstep*3)>>2;
-    oc_filter_hedge(dst,_dst->stride,src-_src->stride,_src->stride,
-     qstep,flimit,variance,variance+fplane->nhfrags);
+    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+     qstep,flimit,variance,variance+nhfrags);
     variance++;
     dc_qi++;
-    for(x=8;x<_dst->width;x+=8){
+    for(x=8;x<width;x+=8){
       qstep=_dec->pp_dc_scale[*dc_qi];
       flimit=(qstep*3)>>2;
-      oc_filter_hedge(dst+x,_dst->stride,src+x-_src->stride,_src->stride,
-       qstep,flimit,variance,variance+fplane->nhfrags);
-      oc_filter_vedge(dst+x-(_dst->stride<<2)-4,_dst->stride,
+      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+       qstep,flimit,variance,variance+nhfrags);
+      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
        qstep,flimit,variance-1);
       variance++;
       dc_qi++;
     }
-    dst+=_dst->stride<<3;
-    src+=_src->stride<<3;
+    dst+=dst_ystride<<3;
+    src+=src_ystride<<3;
   }
   /*And finally, handle the last row in the frame, if it's in the range.*/
   if(!notdone){
-    for(;y<_dst->height;y++){
-      memcpy(dst,src,_dst->width*sizeof(dst[0]));
-      dst+=_dst->stride;
-      src+=_src->stride;
+    int height;
+    height=_dst->height;
+    for(;y<height;y++){
+      memcpy(dst,src,width*sizeof(dst[0]));
+      dst+=dst_ystride;
+      src+=src_ystride;
     }
     /*Filter the last row of vertical block edges.*/
     dc_qi++;
-    for(x=8;x<_dst->width;x+=8){
+    for(x=8;x<width;x+=8){
       qstep=_dec->pp_dc_scale[*dc_qi++];
       flimit=(qstep*3)>>2;
-      oc_filter_vedge(dst+x-(_dst->stride<<3)-4,_dst->stride,
+      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
        qstep,flimit,variance++);
     }
   }
@@ -1625,8 +1654,8 @@
 
 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
  int _dc_scale,int _sharp_mod,int _strong){
-  static const int     MOD_MAX[2]={24,32};
-  static const int     MOD_SHIFT[2]={1,0};
+  static const unsigned char MOD_MAX[2]={24,32};
+  static const unsigned char MOD_SHIFT[2]={1,0};
   const unsigned char *psrc;
   const unsigned char *src;
   const unsigned char *nsrc;
@@ -1675,10 +1704,10 @@
     w=hmod[by];
     a-=w;
     b+=w**(src-!(_b&1));
-    w=vmod[(by<<3)];
+    w=vmod[by<<3];
     a-=w;
     b+=w*psrc[0];
-    w=vmod[(by+1<<3)];
+    w=vmod[by+1<<3];
     a-=w;
     b+=w*nsrc[0];
     w=hmod[(1<<3)+by];
@@ -1736,62 +1765,69 @@
   oc_fragment       *frag;
   int               *variance;
   unsigned char     *idata;
+  ptrdiff_t          froffset;
+  int                ystride;
+  int                nhfrags;
   int                sthresh;
   int                strong;
-  int                froffset;
   int                y_end;
+  int                width;
+  int                height;
   int                y;
   int                x;
   iplane=_img+_pli;
   fplane=_dec->state.fplanes+_pli;
-  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
   variance=_dec->variances+froffset;
   frag=_dec->state.frags+froffset;
   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
   y=_fragy0<<3;
-  idata=iplane->data+y*iplane->stride;
+  ystride=iplane->stride;
+  idata=iplane->data+y*(ptrdiff_t)ystride;
   y_end=_fragy_end<<3;
+  width=iplane->width;
+  height=iplane->height;
   for(;y<y_end;y+=8){
-    for(x=0;x<iplane->width;x+=8){
+    for(x=0;x<width;x+=8){
       int b;
       int qi;
       int var;
       qi=frag->qi;
       var=*variance;
-      b=(x<=0)|(x+8>=iplane->width)<<1|(y<=0)<<2|(y+8>=iplane->height)<<3;
+      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
       if(strong&&var>sthresh){
-        oc_dering_block(idata+x,iplane->stride,b,
+        oc_dering_block(idata+x,ystride,b,
          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
          !(b&2)&&variance[1]>OC_DERING_THRESH4||
-         !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
-         !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
-          oc_dering_block(idata+x,iplane->stride,b,
+         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+          oc_dering_block(idata+x,ystride,b,
            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-          oc_dering_block(idata+x,iplane->stride,b,
+          oc_dering_block(idata+x,ystride,b,
            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
         }
       }
       else if(var>OC_DERING_THRESH2){
-        oc_dering_block(idata+x,iplane->stride,b,
+        oc_dering_block(idata+x,ystride,b,
          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
       }
       else if(var>OC_DERING_THRESH1){
-        oc_dering_block(idata+x,iplane->stride,b,
+        oc_dering_block(idata+x,ystride,b,
          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
       }
       frag++;
       variance++;
     }
-    idata+=iplane->stride<<3;
+    idata+=ystride<<3;
   }
 }
 
 
 
-th_dec_ctx *th_decode_alloc(const th_info *_info,
- const th_setup_info *_setup){
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
   oc_dec_ctx *dec;
   if(_info==NULL||_setup==NULL)return NULL;
   dec=_ogg_malloc(sizeof(*dec));
@@ -1854,15 +1890,15 @@
   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
     if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry = 1;
-    _dec->telemetry_mbmode = *(int *)_buf;
+    _dec->telemetry=1;
+    _dec->telemetry_mbmode=*(int *)_buf;
     return 0;
   }break;
   case TH_DECCTL_SET_TELEMETRY_MV:{
     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
     if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry = 1;
-    _dec->telemetry_mv = *(int *)_buf;
+    _dec->telemetry=1;
+    _dec->telemetry_mv=*(int *)_buf;
     return 0;
   }break;
 #endif
@@ -1897,9 +1933,9 @@
       size_t       yplane_sz;
       size_t       cplane_sz;
       int          yhstride;
-      int          yvstride;
+      int          yheight;
       int          chstride;
-      int          cvstride;
+      int          cheight;
       /*We're decoding an INTER frame, but have no initialized reference
          buffers (i.e., decoding did not start on a key frame).
         We initialize them to a solid gray here.*/
@@ -1908,11 +1944,11 @@
       _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1;
       info=&_dec->state.info;
       yhstride=info->frame_width+2*OC_UMV_PADDING;
-      yvstride=info->frame_height+2*OC_UMV_PADDING;
+      yheight=info->frame_height+2*OC_UMV_PADDING;
       chstride=yhstride>>!(info->pixel_fmt&1);
-      cvstride=yvstride>>!(info->pixel_fmt&2);
-      yplane_sz=(size_t)yhstride*yvstride;
-      cplane_sz=(size_t)chstride*cvstride;
+      cheight=yheight>>!(info->pixel_fmt&2);
+      yplane_sz=yhstride*(size_t)yheight;
+      cplane_sz=chstride*(size_t)cheight;
       memset(_dec->state.ref_frame_data,0x80,yplane_sz+2*cplane_sz);
     }
     else{
@@ -2033,7 +2069,8 @@
       if(_dec->stripe_cb.stripe_decoded!=NULL){
         /*The callback might want to use the FPU, so let's make sure they can.
           We violate all kinds of ABI restrictions by not doing this until
-           now, but none of them actually matter.*/
+           now, but none of them actually matter since we don't use floating
+           point ourselves.*/
         oc_restore_fpu(&_dec->state);
         /*Make the callback, ensuring we flip the sense of the "start" and
            "end" of the available region upside down.*/
@@ -2057,11 +2094,13 @@
       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
        _dec->state.ref_frame_idx[OC_FRAME_SELF];
     }
+    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+       gamma values, if nothing else).*/
+    oc_restore_fpu(&_dec->state);
 #if defined(OC_DUMP_IMAGES)
     /*Don't dump images for dropped frames.*/
     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
 #endif
-    oc_restore_fpu(&_dec->state);
     return 0;
   }
   else{

Modified: branches/theora-thusnelda/lib/dec/fragment.c
===================================================================
--- branches/theora-thusnelda/lib/dec/fragment.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/fragment.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -101,7 +101,7 @@
   Return: The predicted DC value for this fragment.*/
 int oc_frag_pred_dc(const oc_fragment *_frag,
  const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]){
-  static const int PRED_SCALE[16][4]={
+  static const signed char   PRED_SCALE[16][4]={
     /*0*/
     {0,0,0,0},
     /*OC_PL*/
@@ -135,9 +135,11 @@
     /*OC_PL|OC_PUL|OC_PU|OC_PUR*/
     {29,-26,29,0}
   };
-  static const int PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5};
-  static const int PRED_RMASK[16]={0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31};
-  static const int BC_MASK[8]={
+  static const unsigned char PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5};
+  static const unsigned char PRED_RMASK[16]={
+    0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31
+  };
+  static const unsigned char BC_MASK[8]={
     /*No boundary condition.*/
     OC_PL|OC_PUL|OC_PU|OC_PUR,
     /*Left column.*/
@@ -161,8 +163,7 @@
   int                pred_frame;
   /*The boundary condition flags.*/
   int                bc;
-  /*DC predictor values: left, up-left, up, up-right, missing values
-     skipped.*/
+  /*DC predictor values: left, up-left, up, up-right, missing values skipped.*/
   int                p[4];
   /*Predictor count.*/
   int                np;
@@ -171,7 +172,7 @@
   /*The predicted DC value.*/
   int                ret;
   int                i;
-  pred_frame=OC_FRAME_FOR_MODE[_frag->mbmode];
+  pred_frame=OC_FRAME_FOR_MODE[_frag->mb_mode];
   bc=(_x==0)+((_y==0)<<1)+((_x+1==_fplane->nhfrags)<<2);
   predfr[0]=_frag-1;
   predfr[1]=_frag-_fplane->nhfrags-1;
@@ -183,7 +184,7 @@
     int pflag;
     pflag=1<<i;
     if((BC_MASK[bc]&pflag)&&predfr[i]->coded&&
-     OC_FRAME_FOR_MODE[predfr[i]->mbmode]==pred_frame){
+     OC_FRAME_FOR_MODE[predfr[i]->mb_mode]==pred_frame){
       p[np++]=predfr[i]->dc;
       pflags|=pflag;
     }

Modified: branches/theora-thusnelda/lib/dec/internal.c
===================================================================
--- branches/theora-thusnelda/lib/dec/internal.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/internal.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -26,7 +26,7 @@
    block.
   All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
    past the end of a block in bogus streams get mapped to a known location.*/
-const int OC_FZIG_ZAG[128]={
+const unsigned char OC_FZIG_ZAG[64]={
    0, 1, 8,16, 9, 2, 3,10,
   17,24,32,25,18,11, 4, 5,
   12,19,26,33,40,48,41,34,
@@ -34,20 +34,12 @@
   35,42,49,56,57,50,43,36,
   29,22,15,23,30,37,44,51,
   58,59,52,45,38,31,39,46,
-  53,60,61,54,47,55,62,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64
+  53,60,61,54,47,55,62,63
 };
 
 /*A map from the coefficient number in a block to its index in the zig zag
    scan.*/
-const int OC_IZIG_ZAG[64]={
+const unsigned char OC_IZIG_ZAG[64]={
    0, 1, 5, 6,14,15,27,28,
    2, 4, 7,13,16,26,29,42,
    3, 8,12,17,25,30,41,43,
@@ -59,7 +51,7 @@
 };
 
 /*The predictor frame to use for each macro block mode.*/
-const int OC_FRAME_FOR_MODE[8]={
+const unsigned char OC_FRAME_FOR_MODE[8]={
   /*OC_MODE_INTER_NOMV*/
   OC_FRAME_PREV,
   /*OC_MODE_INTRA*/
@@ -80,11 +72,11 @@
 
 /*A map from physical macro block ordering to bitstream macro block
    ordering within a super block.*/
-const int OC_MB_MAP[2][2]={{0,3},{1,2}};
+const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
 
 /*A list of the indices in the oc_mb.map array that can be valid for each of
    the various chroma decimation types.*/
-const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
+const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
   {0,1,2,3,4,8},
   {0,1,2,3,4,5,8,9},
   {0,1,2,3,4,6,8,10},
@@ -93,7 +85,7 @@
 
 /*The number of indices in the oc_mb.map array that can be valid for each of
    the various chroma decimation types.*/
-const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
+const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
 
 /*The number of extra bits that are coded with each of the DCT tokens.
   Each DCT token has some fixed number of additional bits (possibly 0) stored
@@ -125,42 +117,45 @@
            skipped in the current block.
           Otherwise, the negative of the return value indicates that number of
            blocks are to be ended.*/
-typedef int (*oc_token_skip_func)(int _token,int _extra_bits);
+typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
 
 /*Handles the simple end of block tokens.*/
-static int oc_token_skip_eob(int _token,int _extra_bits){
-  static const int NBLOCKS_ADJUST[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
+static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
+  static const unsigned char NBLOCKS_ADJUST[OC_NDCT_EOB_TOKEN_MAX]=
+   {1,2,3,4,8,16,0};
   return -_extra_bits-NBLOCKS_ADJUST[_token];
 }
 
 /*The last EOB token has a special case, where an EOB run of size zero ends all
    the remaining blocks in the frame.*/
-static int oc_token_skip_eob6(int _token,int _extra_bits){
-  if(!_extra_bits)return -INT_MAX;
+static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
+  /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
+     yet available everywhere; this should be equivalent.*/
+  if(!_extra_bits)return -(~(size_t)0>>1);
   return -_extra_bits;
 }
 
 /*Handles the pure zero run tokens.*/
-static int oc_token_skip_zrl(int _token,int _extra_bits){
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
   return _extra_bits+1;
 }
 
 /*Handles a normal coefficient value token.*/
-static int oc_token_skip_val(void){
+static ptrdiff_t oc_token_skip_val(void){
   return 1;
 }
 
 /*Handles a category 1A zero run/coefficient value combo token.*/
-static int oc_token_skip_run_cat1a(int _token){
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
   return _token-OC_DCT_RUN_CAT1A+2;
 }
 
 /*Handles category 1b and 2 zero run/coefficient value combo tokens.*/
-static int oc_token_skip_run(int _token,int _extra_bits){
-  static const int NCOEFFS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+  static const unsigned char NCOEFFS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     7,11,2,3
   };
-  static const int NCOEFFS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+  static const unsigned char NCOEFFS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     3,7,0,1
   };
   _token-=OC_DCT_RUN_CAT1B;
@@ -216,14 +211,15 @@
            blocks are to be ended.
           0 will never be returned, so that at least one coefficient in one
            block will always be decoded for every token.*/
-int oc_dct_token_skip(int _token,int _extra_bits){
+ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
   return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
 }
 
 
 /*The function used to fill in the chroma plane motion vectors for a macro
    block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X and Y directions.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
   _cbmvs: The chroma block-level motion vectors to fill in.
   _lbmvs: The luma block-level motion vectors.*/
 static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
@@ -255,7 +251,7 @@
 
 /*The function used to fill in the chroma plane motion vectors for a macro
    block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X direction.
+  This version is for use with chroma decimated in the X direction (4:2:2).
   _cbmvs: The chroma block-level motion vectors to fill in.
   _lbmvs: The luma block-level motion vectors.*/
 static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
@@ -273,7 +269,7 @@
 
 /*The function used to fill in the chroma plane motion vectors for a macro
    block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with no chroma decimation.
+  This version is for use with no chroma decimation (4:4:4).
   _cbmvs: The chroma block-level motion vectors to fill in.
   _lmbmv: The luma macro-block level motion vector to fill in for use in
            prediction.
@@ -354,7 +350,8 @@
     _dst[pli].width=_src[pli].width;
     _dst[pli].height=_src[pli].height;
     _dst[pli].stride=-_src[pli].stride;
-    _dst[pli].data=_src[pli].data+(1-_dst[pli].height)*_dst[pli].stride;
+    _dst[pli].data=_src[pli].data
+     +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
   }
 }
 
@@ -363,7 +360,7 @@
 }
 
 ogg_uint32_t th_version_number(void){
-  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+(TH_VERSION_SUB);
+  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
 }
 
 /*Determines the packet type.

Modified: branches/theora-thusnelda/lib/dec/state.c
===================================================================
--- branches/theora-thusnelda/lib/dec/state.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/state.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -31,28 +31,29 @@
 #endif
 
 /*Returns the fragment index of the top-left block in a macro block.
-  This can be used to test whether or not the whole macro block is coded.
-  _sb:    The super block.
-  _quadi: The quadrant number.
+  This can be used to test whether or not the whole macro block is valid.
+  _sb_map: The super block map.
+  _quadi:  The quadrant number.
   Return: The index of the fragment of the upper left block in the macro
    block, or -1 if the block lies outside the coded frame.*/
-static int oc_sb_quad_top_left_frag(const oc_sb *_sb,int _quadi){
+static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
   /*It so happens that under the Hilbert curve ordering described below, the
      upper-left block in each macro block is at index 0, except in macro block
      3, where it is at index 2.*/
-  return _sb->map[_quadi][_quadi&_quadi<<1];
+  return _sb_map[_quadi][_quadi&_quadi<<1];
 }
 
 /*Fills in the mapping from block positions to fragment numbers for a single
    color plane.
-  This function also fills in the "valid" flag of each quadrant in a super
-   block.
-  _sbs:    The array of super blocks for the color plane.
-  _frag0:  The index of the first fragment in the plane.
-  _hfrags: The number of horizontal fragments in a coded frame.
-  _vfrags: The number of vertical fragments in a coded frame.*/
-static void oc_sb_create_plane_mapping(oc_sb _sbs[],int _frag0,int _hfrags,
- int _vfrags){
+  This function also fills in the "valid" flag of each quadrant in the super
+   block flags.
+  _sb_maps:  The array of super block maps for the color plane.
+  _sb_flags: The array of super block flags for the color plane.
+  _frag0:    The index of the first fragment in the plane.
+  _hfrags:   The number of horizontal fragments in a coded frame.
+  _vfrags:   The number of vertical fragments in a coded frame.*/
+static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
+ oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
   /*Contains the (macro_block,block) indices for a 4x4 grid of
      fragments.
     The pattern is a 4x4 Hilbert space-filling curve.
@@ -67,10 +68,10 @@
     {{1,0},{1,3},{2,0},{2,3}},
     {{1,1},{1,2},{2,1},{2,2}}
   };
-  oc_sb *sb;
-  int    yfrag;
-  int    y;
-  sb=_sbs;
+  ptrdiff_t  yfrag;
+  unsigned   sbi;
+  int        y;
+  sbi=0;
   yfrag=_frag0;
   for(y=0;;y+=4){
     int imax;
@@ -80,30 +81,31 @@
     imax=_vfrags-y;
     if(imax>4)imax=4;
     else if(imax<=0)break;
-    for(x=0;;x+=4,sb++){
-      int    xfrag;
-      int    jmax;
-      int    quadi;
-      int    i;
+    for(x=0;;x+=4,sbi++){
+      ptrdiff_t xfrag;
+      int       jmax;
+      int       quadi;
+      int       i;
       /*Figure out how many rows of blocks in this super block lie within the
          image.*/
       jmax=_hfrags-x;
       if(jmax>4)jmax=4;
       else if(jmax<=0)break;
       /*By default, set all fragment indices to -1.*/
-      memset(sb->map[0],0xFF,sizeof(sb->map));
+      memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
       /*Fill in the fragment map for this super block.*/
       xfrag=yfrag+x;
       for(i=0;i<imax;i++){
         int j;
         for(j=0;j<jmax;j++){
-          sb->map[SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
+          _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
         }
         xfrag+=_hfrags;
       }
       /*Mark which quadrants of this super block lie within the image.*/
       for(quadi=0;quadi<4;quadi++){
-        sb->quad_valid|=(oc_sb_quad_top_left_frag(sb,quadi)>=0)<<quadi;
+        _sb_flags[sbi].quad_valid|=
+         (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
       }
     }
     yfrag+=_hfrags<<2;
@@ -112,102 +114,103 @@
 
 /*Fills in the Y plane fragment map for a macro block given the fragment
    coordinates of its upper-left hand corner.
-  _mb:     The macro block to fill.
+  _mb_map:    The macro block map to fill.
   _fplane: The description of the Y plane.
-  _x:      The X location of the upper-left hand fragment in the Y plane.
-  _y:      The Y location of the upper-left hand fragment in the Y plane.*/
-static void oc_mb_fill_ymapping(oc_mb *_mb,const oc_fragment_plane *_fplane,
- int _x,int _y){
+  _xfrag0: The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
   int i;
   for(i=0;i<2;i++){
     int j;
-    if(_y+i>=_fplane->nvfrags)break;
+    if(_yfrag0+i>=_fplane->nvfrags)break;
     for(j=0;j<2;j++){
-      if(_x+j>=_fplane->nhfrags)break;
-      _mb->map[0][i<<1|j]=(_y+i)*_fplane->nhfrags+_x+j;
+      if(_xfrag0+j>=_fplane->nhfrags)break;
+      _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
     }
   }
 }
 
 /*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with chroma decimated in the X and Y directions.
-  _mb:      The macro block to fill.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _mb_map:  The macro block map to fill.
   _fplanes: The descriptions of the fragment planes.
-  _x:       The X location of the upper-left hand fragment in the Y plane.
-  _y:       The Y location of the upper-left hand fragment in the Y plane.*/
-static void oc_mb_fill_cmapping00(oc_mb *_mb,
- const oc_fragment_plane _fplanes[3],int _x,int _y){
-  int fragi;
-  _x>>=1;
-  _y>>=1;
-  fragi=_y*_fplanes[1].nhfrags+_x;
-  _mb->map[1][0]=fragi+_fplanes[1].froffset;
-  _mb->map[2][0]=fragi+_fplanes[2].froffset;
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  _xfrag0>>=1;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  _mb_map[1][0]=fragi+_fplanes[1].froffset;
+  _mb_map[2][0]=fragi+_fplanes[2].froffset;
 }
 
 /*Fills in the chroma plane fragment maps for a macro block.
   This version is for use with chroma decimated in the Y direction.
-  _mb:      The macro block to fill.
+  _mb_map:  The macro block map to fill.
   _fplanes: The descriptions of the fragment planes.
-  _x:       The X location of the upper-left hand fragment in the Y plane.
-  _y:       The Y location of the upper-left hand fragment in the Y plane.*/
-static void oc_mb_fill_cmapping01(oc_mb *_mb,
- const oc_fragment_plane _fplanes[3],int _x,int _y){
-  int fragi;
-  int j;
-  _y>>=1;
-  fragi=_y*_fplanes[1].nhfrags+_x;
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       j;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
   for(j=0;j<2;j++){
-    if(_x+j>=_fplanes[1].nhfrags)break;
-    _mb->map[1][j]=fragi+_fplanes[1].froffset;
-    _mb->map[2][j]=fragi+_fplanes[2].froffset;
+    if(_xfrag0+j>=_fplanes[1].nhfrags)break;
+    _mb_map[1][j]=fragi+_fplanes[1].froffset;
+    _mb_map[2][j]=fragi+_fplanes[2].froffset;
     fragi++;
   }
 }
 
 /*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with chroma decimated in the X direction.
-  _mb:      The macro block to fill.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _mb_map:  The macro block map to fill.
   _fplanes: The descriptions of the fragment planes.
-  _x:       The X location of the upper-left hand fragment in the Y plane.
-  _y:       The Y location of the upper-left hand fragment in the Y plane.*/
-static void oc_mb_fill_cmapping10(oc_mb *_mb,
- const oc_fragment_plane _fplanes[3],int _x,int _y){
-  int fragi;
-  int i;
-  _x>>=1;
-  fragi=_y*_fplanes[1].nhfrags+_x;
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       i;
+  _xfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
   for(i=0;i<2;i++){
-    if(_y+i>=_fplanes[1].nvfrags)break;
-    _mb->map[1][i<<1]=fragi+_fplanes[1].froffset;
-    _mb->map[2][i<<1]=fragi+_fplanes[2].froffset;
+    if(_yfrag0+i>=_fplanes[1].nvfrags)break;
+    _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
+    _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
     fragi+=_fplanes[1].nhfrags;
   }
 }
 
 /*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with no chroma decimation.
-  This uses the already filled-in Y plane values.
-  _mb:      The macro block to fill.
+  This version is for use with no chroma decimation (4:4:4).
+  This uses the already filled-in luma plane values.
+  _mb_map:  The macro block map to fill.
   _fplanes: The descriptions of the fragment planes.*/
-static void oc_mb_fill_cmapping11(oc_mb *_mb,
+static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
  const oc_fragment_plane _fplanes[3]){
   int k;
   for(k=0;k<4;k++){
-    if(_mb->map[0][k]>=0){
-      _mb->map[1][k]=_mb->map[0][k]+_fplanes[1].froffset;
-      _mb->map[2][k]=_mb->map[0][k]+_fplanes[2].froffset;
+    if(_mb_map[0][k]>=0){
+      _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
+      _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
     }
   }
 }
 
 /*The function type used to fill in the chroma plane fragment maps for a
    macro block.
-  _mb:      The macro block to fill.
+  _mb_map:  The macro block map to fill.
   _fplanes: The descriptions of the fragment planes.
-  _x:       The X location of the upper-left hand fragment in the Y plane.
-  _y:       The Y location of the upper-left hand fragment in the Y plane.*/
-typedef void (*oc_mb_fill_cmapping_func)(oc_mb *_mb,
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
 
 /*A table of functions used to fill in the chroma plane fragment maps for a
@@ -221,44 +224,43 @@
 
 /*Fills in the mapping from macro blocks to their corresponding fragment
    numbers in each plane.
-  _mbs:     The array of macro blocks.
-  _fplanes: The descriptions of the fragment planes.
-  _ctype:   The chroma decimation type.*/
-static void oc_mb_create_mapping(oc_mb _mbs[],
- const oc_fragment_plane _fplanes[3],int _ctype){
+  _mb_maps:   The list of macro block maps.
+  _mb_modes:  The list of macro block modes; macro blocks completely outside
+               the coded region are marked invalid.
+  _fplanes:   The descriptions of the fragment planes.
+  _pixel_fmt: The chroma decimation type.*/
+static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
+ signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
   oc_mb_fill_cmapping_func  mb_fill_cmapping;
-  oc_mb                    *mb0;
+  unsigned                  sbi;
   int                       y;
-  mb0=_mbs;
-  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_ctype];
-  /*Loop through the Y plane super blocks.*/
-  for(y=0;y<_fplanes[0].nvfrags;y+=4){
+  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
+  /*Loop through the luma plane super blocks.*/
+  for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
     int x;
-    for(x=0;x<_fplanes[0].nhfrags;x+=4,mb0+=4){
+    for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
       int ymb;
       /*Loop through the macro blocks in each super block in display order.*/
       for(ymb=0;ymb<2;ymb++){
         int xmb;
         for(xmb=0;xmb<2;xmb++){
-          oc_mb *mb;
-          int    mbx;
-          int    mby;
-          mb=mb0+OC_MB_MAP[ymb][xmb];
+          unsigned mbi;
+          int      mbx;
+          int      mby;
+          mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
           mbx=x|xmb<<1;
           mby=y|ymb<<1;
-          mb->x=mbx<<3;
-          mb->y=mby<<3;
-          /*Initialize fragment indexes to -1.*/
-          memset(mb->map,0xFF,sizeof(mb->map));
+          /*Initialize fragment indices to -1.*/
+          memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
           /*Make sure this macro block is within the encoded region.*/
           if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
-            mb->mode=OC_MODE_INVALID;
+            _mb_modes[mbi]=OC_MODE_INVALID;
             continue;
           }
-          /*Fill in the fragment indices for the Y plane.*/
-          oc_mb_fill_ymapping(mb,_fplanes,mbx,mby);
+          /*Fill in the fragment indices for the luma plane.*/
+          oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
           /*Fill in the fragment indices for the chroma planes.*/
-          (*mb_fill_cmapping)(mb,_fplanes,mbx,mby);
+          (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
         }
       }
     }
@@ -269,18 +271,14 @@
    region of the frame.
   _state: The Theora state containing the fragments to be marked.*/
 static void oc_state_border_init(oc_theora_state *_state){
-  typedef struct{
-    int x0;
-    int y0;
-    int xf;
-    int yf;
-  }oc_crop_rect;
   oc_fragment       *frag;
   oc_fragment       *yfrag_end;
   oc_fragment       *xfrag_end;
   oc_fragment_plane *fplane;
-  oc_crop_rect      *crop;
-  oc_crop_rect       crop_rects[3];
+  int                crop_x0;
+  int                crop_y0;
+  int                crop_xf;
+  int                crop_yf;
   int                pli;
   int                y;
   int                x;
@@ -294,20 +292,19 @@
   yfrag_end=frag=_state->frags;
   for(pli=0;pli<3;pli++){
     fplane=_state->fplanes+pli;
-    crop=crop_rects+pli;
     /*Set up the cropping rectangle for this plane.*/
-    crop->x0=_state->info.pic_x;
-    crop->xf=_state->info.pic_x+_state->info.pic_width;
-    crop->y0=_state->info.pic_y;
-    crop->yf=_state->info.pic_y+_state->info.pic_height;
+    crop_x0=_state->info.pic_x;
+    crop_xf=_state->info.pic_x+_state->info.pic_width;
+    crop_y0=_state->info.pic_y;
+    crop_yf=_state->info.pic_y+_state->info.pic_height;
     if(pli>0){
       if(!(_state->info.pixel_fmt&1)){
-        crop->x0=crop->x0>>1;
-        crop->xf=crop->xf+1>>1;
+        crop_x0=crop_x0>>1;
+        crop_xf=crop_xf+1>>1;
       }
       if(!(_state->info.pixel_fmt&2)){
-        crop->y0=crop->y0>>1;
-        crop->yf=crop->yf+1>>1;
+        crop_y0=crop_y0>>1;
+        crop_yf=crop_yf+1>>1;
       }
     }
     y=0;
@@ -320,13 +317,13 @@
           This guarantees that if we count a fragment as straddling the
            border below, at least one pixel in the fragment will be inside
            the displayable region.*/
-        if(x+8<=crop->x0||crop->xf<=x||y+8<=crop->y0||crop->yf<=y||
-         crop->x0>=crop->xf||crop->y0>=crop->yf){
+        if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
+         crop_x0>=crop_xf||crop_y0>=crop_yf){
           frag->invalid=1;
         }
         /*Otherwise, check to see if it straddles the border.*/
-        else if(x<crop->x0&&crop->x0<x+8||x<crop->xf&&crop->xf<x+8||
-         y<crop->y0&&crop->y0<y+8||y<crop->yf&&crop->yf<y+8){
+        else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
+         y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
           ogg_int64_t mask;
           int         npixels;
           int         i;
@@ -334,7 +331,7 @@
           for(i=0;i<8;i++){
             int j;
             for(j=0;j<8;j++){
-              if(x+j>=crop->x0&&x+j<crop->xf&&y+i>=crop->y0&&y+i<crop->yf){
+              if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
                 mask|=(ogg_int64_t)1<<(i<<3|j);
                 npixels++;
               }
@@ -342,7 +339,7 @@
           }
           /*Search the fragment array for border info with the same pattern.
             In general, there will be at most 8 different patterns (per
-            plane).*/
+             plane).*/
           for(i=0;;i++){
             if(i>=_state->nborders){
               _state->nborders++;
@@ -350,34 +347,35 @@
               _state->borders[i].npixels=npixels;
             }
             else if(_state->borders[i].mask!=mask)continue;
-            frag->border=_state->borders+i;
+            frag->borderi=i;
             break;
           }
         }
+        else frag->borderi=-1;
       }
     }
   }
 }
 
-static void oc_state_frarray_init(oc_theora_state *_state){
-  int yhfrags;
-  int yvfrags;
-  int chfrags;
-  int cvfrags;
-  int yfrags;
-  int cfrags;
-  int nfrags;
-  int yhsbs;
-  int yvsbs;
-  int chsbs;
-  int cvsbs;
-  int ysbs;
-  int csbs;
-  int nsbs;
-  int nmbs;
-  int hdec;
-  int vdec;
-  int pli;
+static int oc_state_frarray_init(oc_theora_state *_state){
+  int       yhfrags;
+  int       yvfrags;
+  int       chfrags;
+  int       cvfrags;
+  ptrdiff_t yfrags;
+  ptrdiff_t cfrags;
+  ptrdiff_t nfrags;
+  unsigned  yhsbs;
+  unsigned  yvsbs;
+  unsigned  chsbs;
+  unsigned  cvsbs;
+  unsigned  ysbs;
+  unsigned  csbs;
+  unsigned  nsbs;
+  size_t    nmbs;
+  int       hdec;
+  int       vdec;
+  int       pli;
   /*Figure out the number of fragments in each plane.*/
   /*These parameters have already been validated to be multiples of 16.*/
   yhfrags=_state->info.frame_width>>3;
@@ -386,8 +384,8 @@
   vdec=!(_state->info.pixel_fmt&2);
   chfrags=yhfrags+hdec>>hdec;
   cvfrags=yvfrags+vdec>>vdec;
-  yfrags=yhfrags*yvfrags;
-  cfrags=chfrags*cvfrags;
+  yfrags=yhfrags*(ptrdiff_t)yvfrags;
+  cfrags=chfrags*(ptrdiff_t)cvfrags;
   nfrags=yfrags+2*cfrags;
   /*Figure out the number of super blocks in each plane.*/
   yhsbs=yhfrags+3>>2;
@@ -397,7 +395,20 @@
   ysbs=yhsbs*yvsbs;
   csbs=chsbs*cvsbs;
   nsbs=ysbs+2*csbs;
-  nmbs=ysbs<<2;
+  nmbs=(size_t)ysbs<<2;
+  /*Check for overflow.
+    We support the ridiculous upper limits of the specification (1048560 by
+     1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
+     but for those with 32-bit pointers (or smaller!) we have to check.
+    If the caller wants to prevent denial-of-service by imposing a more
+     reasonable upper limit on the size of attempted allocations, they must do
+     so themselves; we have no platform independent way to determine how much
+     system memory there is nor an application-independent way to decide what a
+     "reasonable" allocation is.*/
+  if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
+   ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
+    return TH_EIMPL;
+  }
   /*Initialize the fragment array.*/
   _state->fplanes[0].nhfrags=yhfrags;
   _state->fplanes[0].nvfrags=yvfrags;
@@ -418,34 +429,43 @@
   _state->fplanes[2].sboffset=ysbs+csbs;
   _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
   _state->nfrags=nfrags;
-  _state->frags=_ogg_calloc(nfrags,sizeof(oc_fragment));
+  _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
+  _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
   _state->nsbs=nsbs;
-  _state->sbs=_ogg_calloc(nsbs,sizeof(oc_sb));
+  _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
+  _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
   _state->nhmbs=yhsbs<<1;
   _state->nvmbs=yvsbs<<1;
   _state->nmbs=nmbs;
-  _state->mbs=_ogg_calloc(nmbs,sizeof(oc_mb));
-  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(_state->coded_fragis[0]));
+  _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
+  _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
+  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
   _state->uncoded_fragis=_state->coded_fragis+nfrags;
-  _state->coded_mbis=_ogg_malloc(nmbs*sizeof(_state->coded_mbis[0]));
+  _state->coded_mbis=_ogg_malloc(nmbs*sizeof(*_state->coded_mbis));
   /*Create the mapping from super blocks to fragments.*/
   for(pli=0;pli<3;pli++){
     oc_fragment_plane *fplane;
     fplane=_state->fplanes+pli;
-    oc_sb_create_plane_mapping(_state->sbs+fplane->sboffset,
-     fplane->froffset,fplane->nhfrags,fplane->nvfrags);
+    oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
+     _state->sb_flags+fplane->sboffset,fplane->froffset,
+     fplane->nhfrags,fplane->nvfrags);
   }
   /*Create the mapping from macro blocks to fragments.*/
-  oc_mb_create_mapping(_state->mbs,_state->fplanes,_state->info.pixel_fmt);
-  /*Initialize the invalid and border fields of each fragment.*/
+  oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
+   _state->fplanes,_state->info.pixel_fmt);
+  /*Initialize the invalid and borderi fields of each fragment.*/
   oc_state_border_init(_state);
+  return 0;
 }
 
 static void oc_state_frarray_clear(oc_theora_state *_state){
   _ogg_free(_state->coded_mbis);
   _ogg_free(_state->coded_fragis);
-  _ogg_free(_state->mbs);
-  _ogg_free(_state->sbs);
+  _ogg_free(_state->mb_modes);
+  _ogg_free(_state->mb_maps);
+  _ogg_free(_state->sb_flags);
+  _ogg_free(_state->sb_maps);
+  _ogg_free(_state->frag_mvs);
   _ogg_free(_state->frags);
 }
 
@@ -456,38 +476,54 @@
   If chroma is decimated in either direction, the padding is reduced by a
    factor of 2 on the appropriate sides.
   _enc: The encoding context to store the buffers in.*/
-static void oc_state_ref_bufs_init(oc_theora_state *_state){
-  th_info   *info;
+static int oc_state_ref_bufs_init(oc_theora_state *_state){
+  th_info       *info;
   unsigned char *ref_frame_data;
+  size_t         ref_frame_data_sz;
+  size_t         ref_frame_sz;
   size_t         yplane_sz;
   size_t         cplane_sz;
   int            yhstride;
-  int            yvstride;
+  int            yheight;
   int            chstride;
-  int            cvstride;
-  int            yoffset;
-  int            coffset;
+  int            cheight;
+  ptrdiff_t      yoffset;
+  ptrdiff_t      coffset;
+  ptrdiff_t     *frag_buf_offs;
+  ptrdiff_t      fragi;
+  int            hdec;
+  int            vdec;
   int            rfi;
+  int            pli;
   info=&_state->info;
   /*Compute the image buffer parameters for each plane.*/
+  hdec=!(info->pixel_fmt&1);
+  vdec=!(info->pixel_fmt&1);
   yhstride=info->frame_width+2*OC_UMV_PADDING;
-  yvstride=info->frame_height+2*OC_UMV_PADDING;
-  chstride=yhstride>>!(info->pixel_fmt&1);
-  cvstride=yvstride>>!(info->pixel_fmt&2);
-  yplane_sz=(size_t)yhstride*yvstride;
-  cplane_sz=(size_t)chstride*cvstride;
-  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*yhstride;
-  coffset=(OC_UMV_PADDING>>!(info->pixel_fmt&1))+
-   (OC_UMV_PADDING>>!(info->pixel_fmt&2))*chstride;
-  _state->ref_frame_data=ref_frame_data=_ogg_malloc(3*(yplane_sz+2*cplane_sz));
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=yhstride>>hdec;
+  cheight=yheight>>vdec;
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
+  coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
+  ref_frame_sz=yplane_sz+2*cplane_sz;
+  ref_frame_data_sz=3*ref_frame_sz;
+  /*Check for overflow.
+    The same caveats apply as for oc_state_frarray_init().*/
+  if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
+   ref_frame_sz<yplane_sz||ref_frame_data_sz/3!=ref_frame_sz){
+    return TH_EIMPL;
+  }
+  ref_frame_data=_ogg_malloc(ref_frame_data_sz);
   /*Set up the width, height and stride for the image buffers.*/
   _state->ref_frame_bufs[0][0].width=info->frame_width;
   _state->ref_frame_bufs[0][0].height=info->frame_height;
   _state->ref_frame_bufs[0][0].stride=yhstride;
   _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
-   info->frame_width>>!(info->pixel_fmt&1);
+   info->frame_width>>hdec;
   _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
-   info->frame_height>>!(info->pixel_fmt&2);
+   info->frame_height>>vdec;
   _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
    chstride;
   memcpy(_state->ref_frame_bufs[1],_state->ref_frame_bufs[0],
@@ -496,26 +532,58 @@
    sizeof(_state->ref_frame_bufs[0]));
   /*Set up the data pointers for the image buffers.*/
   for(rfi=0;rfi<3;rfi++){
+    _state->ref_frame_data[rfi]=ref_frame_data;
     _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
     ref_frame_data+=yplane_sz;
     _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
     ref_frame_data+=cplane_sz;
     _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
     ref_frame_data+=cplane_sz;
-    /*Flip the buffer upside down.*/
+    /*Flip the buffer upside down.
+      This allows us to decode Theora's bottom-up frames in their natural
+       order, yet return a top-down buffer with a positive stride to the user.*/
     oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
      _state->ref_frame_bufs[rfi]);
-    /*Initialize the fragment pointers into this buffer.*/
-    oc_state_fill_buffer_ptrs(_state,rfi,_state->ref_frame_bufs[rfi]);
   }
-  /*Initialize the reference frame indexes.*/
+  /*Initialize the fragment buffer offsets.*/
+  ref_frame_data=_state->ref_frame_data[0];
+  frag_buf_offs=_state->frag_buf_offs=
+   _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
+  fragi=0;
+  for(pli=0;pli<3;pli++){
+    th_img_plane      *iplane;
+    oc_fragment_plane *fplane;
+    unsigned char     *vpix;
+    ptrdiff_t          stride;
+    ptrdiff_t          vfragi_end;
+    int                nhfrags;
+    iplane=_state->ref_frame_bufs[0]+pli;
+    fplane=_state->fplanes+pli;
+    vpix=iplane->data;
+    vfragi_end=fplane->froffset+fplane->nfrags;
+    nhfrags=fplane->nhfrags;
+    stride=iplane->stride;
+    while(fragi<vfragi_end){
+      ptrdiff_t      hfragi_end;
+      unsigned char *hpix;
+      hpix=vpix;
+      for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
+        frag_buf_offs[fragi]=hpix-ref_frame_data;
+        hpix+=8;
+      }
+      vpix+=stride<<3;
+    }
+  }
+  /*Initialize the reference frame indices.*/
   _state->ref_frame_idx[OC_FRAME_GOLD]=
    _state->ref_frame_idx[OC_FRAME_PREV]=
    _state->ref_frame_idx[OC_FRAME_SELF]=-1;
+  return 0;
 }
 
 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
-  _ogg_free(_state->ref_frame_data);
+  _ogg_free(_state->frag_buf_offs);
+  _ogg_free(_state->ref_frame_data[0]);
 }
 
 
@@ -544,6 +612,7 @@
 
 int oc_state_init(oc_theora_state *_state,const th_info *_info){
   int old_granpos;
+  int ret;
   /*First validate the parameters.*/
   if(_info==NULL)return TH_EFAULT;
   /*The width and height of the encoded frame must be multiples of 16.
@@ -572,8 +641,10 @@
   _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
   _state->frame_type=OC_UNKWN_FRAME;
   oc_state_vtable_init(_state);
-  oc_state_frarray_init(_state);
-  oc_state_ref_bufs_init(_state);
+  ret=oc_state_frarray_init(_state);
+  if(ret<0)return ret;
+  ret=oc_state_ref_bufs_init(_state);
+  if(ret<0)return ret;
   /*If the keyframe_granule_shift is out of range, use the maximum allowable
      value.*/
   if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
@@ -607,22 +678,24 @@
   _yend: The Y coordinate of the row to stop padding at.*/
 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
  int _y0,int _yend){
-  th_img_plane *iplane;
-  unsigned char    *apix;
-  unsigned char    *bpix;
-  unsigned char    *epix;
-  int               hpadding;
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
   iplane=_state->ref_frame_bufs[_refi]+_pli;
-  apix=iplane->data+_y0*iplane->stride;
+  stride=iplane->stride;
+  apix=iplane->data+_y0*(ptrdiff_t)stride;
   bpix=apix+iplane->width-1;
-  epix=iplane->data+_yend*iplane->stride;
-  /*Note the use of != instead of <, which allows ystride to be negative.*/
+  epix=iplane->data+_yend*(ptrdiff_t)stride;
+  /*Note the use of != instead of <, which allows the stride to be negative.*/
   while(apix!=epix){
     memset(apix-hpadding,apix[0],hpadding);
     memset(bpix+1,bpix[0],hpadding);
-    apix+=iplane->stride;
-    bpix+=iplane->stride;
+    apix+=stride;
+    bpix+=stride;
   }
 }
 
@@ -633,25 +706,27 @@
   _refi:      The index of the reference buffer to pad.
   _pli:       The color plane.*/
 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
-  th_img_plane *iplane;
-  unsigned char    *apix;
-  unsigned char    *bpix;
-  unsigned char    *epix;
-  int               hpadding;
-  int               vpadding;
-  int               fullw;
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  int            vpadding;
+  int            fullw;
   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
   vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
   iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
   fullw=iplane->width+(hpadding<<1);
   apix=iplane->data-hpadding;
-  bpix=iplane->data+(iplane->height-1)*iplane->stride-hpadding;
-  epix=apix-iplane->stride*vpadding;
+  bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
+  epix=apix-stride*(ptrdiff_t)vpadding;
   while(apix!=epix){
-    memcpy(apix-iplane->stride,apix,fullw);
-    memcpy(bpix+iplane->stride,bpix,fullw);
-    apix-=iplane->stride;
-    bpix+=iplane->stride;
+    memcpy(apix-stride,apix,fullw);
+    memcpy(bpix+stride,bpix,fullw);
+    apix-=stride;
+    bpix+=stride;
   }
 }
 
@@ -668,47 +743,6 @@
   }
 }
 
-/*Sets the buffer pointer in each fragment to point to the portion of the
-   image buffer which it corresponds to.
-  _state:   The Theora state to fill.
-  _buf_idx: The index of the buffer pointer to fill.
-            The first three correspond to our reconstructed frame buffers,
-             while the last corresponds to the input image.
-  _img:     The image buffer to fill the fragments with.*/
-void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
- th_ycbcr_buffer _img){
-  int pli;
-  /*Special handling for the input image to give us the opportunity to skip
-     some updates.
-    The other buffers do not change throughout the encoding process.*/
-  if(_buf_idx==OC_FRAME_IO){
-     if(memcmp(_state->input,_img,sizeof(th_ycbcr_buffer))==0)return;
-     memcpy(_state->input,_img,sizeof(th_ycbcr_buffer));
-  }
-  for(pli=0;pli<3;pli++){
-    th_img_plane  *iplane;
-    oc_fragment_plane *fplane;
-    oc_fragment       *frag;
-    oc_fragment       *vfrag_end;
-    unsigned char     *vpix;
-    iplane=&_img[pli];
-    fplane=&_state->fplanes[pli];
-    vpix=iplane->data;
-    frag=_state->frags+fplane->froffset;
-    vfrag_end=frag+fplane->nfrags;
-    while(frag<vfrag_end){
-      oc_fragment   *hfrag_end;
-      unsigned char *hpix;
-      hpix=vpix;
-      for(hfrag_end=frag+fplane->nhfrags;frag<hfrag_end;frag++){
-        frag->buffer[_buf_idx]=hpix;
-        hpix+=8;
-      }
-      vpix+=iplane->stride<<3;
-    }
-  }
-}
-
 /*Returns the macro block index of the macro block in the given position.
   _state: The Theora state the macro block is contained in.
   _mbx:   The X coordinate of the macro block (in macro blocks, not pixels).
@@ -754,8 +788,8 @@
   int offs;
   /*These two variables decide whether we are in half- or quarter-pixel
      precision in each component.*/
-  xprec=1+(!(_state->info.pixel_fmt&1)&&_pli);
-  yprec=1+(!(_state->info.pixel_fmt&2)&&_pli);
+  xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
+  yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
   /*These two variables are either 0 if all the fractional bits are zero or -1
      if any of them are non-zero.*/
   xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
@@ -813,10 +847,10 @@
   int mx2;
   int my2;
   int offs;
-  qpy=!(_state->info.pixel_fmt&2)&&_pli;
+  qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
   my=OC_MVMAP[qpy][_dy+31];
   my2=OC_MVMAP2[qpy][_dy+31];
-  qpx=!(_state->info.pixel_fmt&1)&&_pli;
+  qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
   mx=OC_MVMAP[qpx][_dx+31];
   mx2=OC_MVMAP2[qpx][_dx+31];
   offs=my*_ystride+mx;
@@ -830,45 +864,43 @@
 #endif
 }
 
-void oc_state_frag_recon(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]){
-  _state->opt_vtable.state_frag_recon(_state,_frag,_pli,_dct_coeffs,
+  _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
    _last_zzi,_ncoefs,_dc_quant,_ac_quant);
 }
 
-void oc_state_frag_recon_c(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant, const ogg_uint16_t _ac_quant[64]){
-  ogg_int16_t res_buf[64];
-  int dst_framei;
-  int ystride;
+  ogg_int16_t    res_buf[64];
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            mb_mode;
   /*Dequantize and apply the inverse transform.*/
   oc_dequant_idct8x8(_state,res_buf,_dct_coeffs,
    _last_zzi,_ncoefs,_dc_quant,_ac_quant);
   /*Fill in the target buffer.*/
-  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
-  ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  /*For now ystride values in all ref frames assumed to be equal.*/
-  if(_frag->mbmode==OC_MODE_INTRA){
-    oc_frag_recon_intra(_state,_frag->buffer[dst_framei],ystride,res_buf);
-  }
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_frame_bufs[0][_pli].stride;
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,res_buf);
   else{
-    int ref_framei;
-    int mvoffsets[2];
-    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE[mb_mode]]]
+     +frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,
-     _frag->mv[0],_frag->mv[1],ystride,_pli)>1){
-      oc_frag_recon_inter2(_state,_frag->buffer[dst_framei],
-       _frag->buffer[ref_framei]+mvoffsets[0],
-       _frag->buffer[ref_framei]+mvoffsets[1],ystride,res_buf);
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1],ystride,_pli)>1){
+      oc_frag_recon_inter2(_state,
+       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,res_buf);
     }
-    else{
-      oc_frag_recon_inter(_state,_frag->buffer[dst_framei],
-       _frag->buffer[ref_framei]+mvoffsets[0],ystride,res_buf);
-    }
+    else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,res_buf);
   }
-  oc_restore_fpu(_state);
 }
 
 /*Copies the fragments specified by the lists of fragment indices from one
@@ -878,28 +910,30 @@
   _dst_frame: The reference frame to copy to.
   _src_frame: The reference frame to copy from.
   _pli:       The color plane the fragments lie in.*/
-void oc_state_frag_copy_list(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
+void oc_state_frag_copy_list(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
   _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
    _src_frame,_pli);
 }
 
 void oc_state_frag_copy_list_c(const oc_theora_state *_state,
- const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli){
-  const int *fragi;
-  const int *fragi_end;
-  int        dst_framei;
-  int        src_framei;
-  int        ystride;
-  dst_framei=_state->ref_frame_idx[_dst_frame];
-  src_framei=_state->ref_frame_idx[_src_frame];
-  ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  fragi_end=_fragis+_nfragis;
-  for(fragi=_fragis;fragi<fragi_end;fragi++){
-    oc_fragment   *frag;
-    frag=_state->frags+*fragi;
-    oc_frag_copy(_state,frag->buffer[dst_framei],
-     frag->buffer[src_framei],ystride);
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_frame_bufs[0][_pli].stride;
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+    oc_frag_copy(_state,dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
   }
 }
 
@@ -920,25 +954,24 @@
 }
 
 static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
-  int y;
+  int x;
   _pix-=_ystride*2;
-  for(y=0;y<8;y++){
+  for(x=0;x<8;x++){
     int f;
-    f=_pix[0]-_pix[_ystride*3]+3*(_pix[_ystride*2]-_pix[_ystride]);
+    f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
     /*The _bv array is used to compute the function
       f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
       where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
     f=*(_bv+(f+4>>3));
-    _pix[_ystride]=OC_CLAMP255(_pix[_ystride]+f);
-    _pix[_ystride*2]=OC_CLAMP255(_pix[_ystride*2]-f);
-    _pix++;
+    _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
+    _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
   }
 }
 
 /*Initialize the bounding values array used by the loop filter.
   _bv: Storage for the array.
   Return: 0 on success, or a non-zero value if no filtering need be applied.*/
-int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv){
+int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
   int flimit;
   int i;
   flimit=_state->loop_filter_limits[_state->qis[0]];
@@ -961,7 +994,7 @@
   _pli:       The color plane to filter.
   _fragy0:    The Y coordinate of the first fragment row to filter.
   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int *_bv,
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
  int _refi,int _pli,int _fragy0,int _fragy_end){
   _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
    _fragy0,_fragy_end);
@@ -971,46 +1004,53 @@
  int _refi,int _pli,int _fragy0,int _fragy_end){
   const th_img_plane      *iplane;
   const oc_fragment_plane *fplane;
-  oc_fragment             *frag_top;
-  oc_fragment             *frag0;
-  oc_fragment             *frag;
-  oc_fragment             *frag_end;
-  oc_fragment             *frag0_end;
-  oc_fragment             *frag_bot;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
   _bv+=127;
   iplane=_state->ref_frame_bufs[_refi]+_pli;
   fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
   /*The following loops are constructed somewhat non-intuitively on purpose.
     The main idea is: if a block boundary has at least one coded fragment on
      it, the filter is applied to it.
     However, the order that the filters are applied in matters, and VP3 chose
      the somewhat strange ordering used below.*/
-  frag_top=_state->frags+fplane->froffset;
-  frag0=frag_top+_fragy0*fplane->nhfrags;
-  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
-  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
-  while(frag0<frag0_end){
-    frag=frag0;
-    frag_end=frag+fplane->nhfrags;
-    while(frag<frag_end){
-      if(frag->coded){
-        if(frag>frag0){
-          loop_filter_h(frag->buffer[_refi],iplane->stride,_bv);
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=iplane->stride;
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
+        if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          loop_filter_h(ref+8,ystride,_bv);
         }
-        if(frag0>frag_top){
-          loop_filter_v(frag->buffer[_refi],iplane->stride,_bv);
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          loop_filter_v(ref+(ystride<<3),ystride,_bv);
         }
-        if(frag+1<frag_end&&!(frag+1)->coded){
-          loop_filter_h(frag->buffer[_refi]+8,iplane->stride,_bv);
-        }
-        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
-          loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
-           iplane->stride,_bv);
-        }
       }
-      frag++;
+      fragi++;
     }
-    frag0+=fplane->nhfrags;
+    fragi0+=nhfrags;
   }
 }
 
@@ -1046,7 +1086,7 @@
   sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
   fp=fopen(fname,"wb");
   if(fp==NULL)return TH_EFAULT;
-  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(image[0][0]));
+  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
   png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
   if(png==NULL){
     oc_free_2d(image);

Modified: branches/theora-thusnelda/lib/dec/x86/mmxidct.c
===================================================================
--- branches/theora-thusnelda/lib/dec/x86/mmxidct.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/x86/mmxidct.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -552,7 +552,6 @@
 void oc_dequant_idct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64],
  int _last_zzi,int _ncoefs,ogg_uint16_t _dc_quant,
  const ogg_uint16_t _ac_quant[64]){
-  int ci;
   /*_last_zzi is subtly different from an actual count of the number of
      coefficients we decoded for this block.
     It contains the value of zzi BEFORE the final token in the block was
@@ -651,5 +650,4 @@
   }
 }
 
-
 #endif

Modified: branches/theora-thusnelda/lib/dec/x86/mmxstate.c
===================================================================
--- branches/theora-thusnelda/lib/dec/x86/mmxstate.c	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/x86/mmxstate.c	2009-05-02 17:49:35 UTC (rev 15977)
@@ -24,36 +24,35 @@
 
 #if defined(OC_X86_ASM)
 
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]){
-  ogg_int16_t  __attribute__((aligned(8))) res_buf[64];
-  int dst_framei;
-  int ystride;
+  ogg_int16_t  OC_ALIGN8  res_buf[64];
+  unsigned char          *dst;
+  ptrdiff_t               frag_buf_off;
+  int                     ystride;
+  int                     mb_mode;
   /*Dequantize and apply the inverse transform.*/
   oc_dequant_idct8x8_mmx(res_buf,_dct_coeffs,
    _last_zzi,_ncoefs,_dc_quant,_ac_quant);
   /*Fill in the target buffer.*/
-  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
-  ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  /*For now ystride values in all ref frames assumed to be equal.*/
-  if(_frag->mbmode==OC_MODE_INTRA){
-    oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],ystride,res_buf);
-  }
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_frame_bufs[0][_pli].stride;
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,res_buf);
   else{
-    int ref_framei;
-    int mvoffsets[2];
-    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE[mb_mode]]]
+     +frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,
-     _frag->mv[0],_frag->mv[1],ystride,_pli)>1){
-      oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],
-       _frag->buffer[ref_framei]+mvoffsets[0],
-       _frag->buffer[ref_framei]+mvoffsets[1],ystride,res_buf);
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1],ystride,_pli)>1){
+      oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       res_buf);
     }
-    else{
-      oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],
-       _frag->buffer[ref_framei]+mvoffsets[0],ystride,res_buf);
-    }
+    else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,res_buf);
   }
 }
 
@@ -68,20 +67,22 @@
   _src_frame: The reference frame to copy from.
   _pli:       The color plane the fragments lie in.*/
 void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
- const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli){
-  const int *fragi;
-  const int *fragi_end;
-  int        dst_framei;
-  int        src_framei;
-  int        ystride;
-  dst_framei=_state->ref_frame_idx[_dst_frame];
-  src_framei=_state->ref_frame_idx[_src_frame];
-  ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  fragi_end=_fragis+_nfragis;
-  for(fragi=_fragis;fragi<fragi_end;fragi++){
-    oc_fragment *frag;
-    frag=_state->frags+*fragi;
-    OC_FRAG_COPY_MMX(frag->buffer[dst_framei],frag->buffer[src_framei],ystride);
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_frame_bufs[0][_pli].stride;
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+    OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
   }
 }
 
@@ -93,51 +94,58 @@
   _pli:       The color plane to filter.
   _fragy0:    The Y coordinate of the first fragment row to filter.
   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
   unsigned char OC_ALIGN8  ll[8];
   const th_img_plane      *iplane;
   const oc_fragment_plane *fplane;
-  oc_fragment             *frag_top;
-  oc_fragment             *frag0;
-  oc_fragment             *frag;
-  oc_fragment             *frag_end;
-  oc_fragment             *frag0_end;
-  oc_fragment             *frag_bot;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
   memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
   iplane=_state->ref_frame_bufs[_refi]+_pli;
   fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
   /*The following loops are constructed somewhat non-intuitively on purpose.
     The main idea is: if a block boundary has at least one coded fragment on
      it, the filter is applied to it.
     However, the order that the filters are applied in matters, and VP3 chose
      the somewhat strange ordering used below.*/
-  frag_top=_state->frags+fplane->froffset;
-  frag0=frag_top+_fragy0*fplane->nhfrags;
-  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
-  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
-  while(frag0<frag0_end){
-    frag=frag0;
-    frag_end=frag+fplane->nhfrags;
-    while(frag<frag_end){
-      if(frag->coded){
-        if(frag>frag0){
-          OC_LOOP_FILTER_H_MMX(frag->buffer[_refi],iplane->stride,ll);
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=iplane->stride;
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll);
+        if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          OC_LOOP_FILTER_H_MMX(ref+8,ystride,ll);
         }
-        if(frag0>frag_top){
-          OC_LOOP_FILTER_V_MMX(frag->buffer[_refi],iplane->stride,ll);
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,ll);
         }
-        if(frag+1<frag_end&&!(frag+1)->coded){
-          OC_LOOP_FILTER_H_MMX(frag->buffer[_refi]+8,iplane->stride,ll);
-        }
-        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
-          OC_LOOP_FILTER_V_MMX((frag+fplane->nhfrags)->buffer[_refi],
-           iplane->stride,ll);
-        }
       }
-      frag++;
+      fragi++;
     }
-    frag0+=fplane->nhfrags;
+    fragi0+=nhfrags;
   }
 }
 

Modified: branches/theora-thusnelda/lib/dec/x86/x86int.h
===================================================================
--- branches/theora-thusnelda/lib/dec/x86/x86int.h	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/dec/x86/x86int.h	2009-05-02 17:49:35 UTC (rev 15977)
@@ -32,13 +32,14 @@
 void oc_dequant_idct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64],
  int _last_zzi,int _ncoefs,ogg_uint16_t _dc_quant,
  const ogg_uint16_t _ac_quant[64]);
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]);
 void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
- const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli);
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu_mmx(void);
 
 #endif

Modified: branches/theora-thusnelda/lib/internal.h
===================================================================
--- branches/theora-thusnelda/lib/internal.h	2009-05-01 15:22:09 UTC (rev 15976)
+++ branches/theora-thusnelda/lib/internal.h	2009-05-02 17:49:35 UTC (rev 15977)
@@ -50,8 +50,7 @@
 
 
 
-typedef struct oc_sb                    oc_sb;
-typedef struct oc_mb                    oc_mb;
+typedef struct oc_sb_flags              oc_sb_flags;
 typedef struct oc_border_info           oc_border_info;
 typedef struct oc_fragment              oc_fragment;
 typedef struct oc_fragment_plane        oc_fragment_plane;
@@ -151,17 +150,7 @@
 
 
 
-/*A map from a super block to fragment numbers.*/
-typedef int         oc_sb_map[4][4];
-/*A map from a macro block to fragment numbers.*/
-typedef int         oc_mb_map[3][4];
-/*A motion vector.*/
-typedef signed char oc_mv[2];
-
-
-
-/*Super block information.
-  Super blocks are 32x32 segments of pixels in a single color plane indexed
+/*Super blocks are 32x32 segments of pixels in a single color plane indexed
    in image order.
   Internally, super blocks are broken up into four quadrants, each of which
    contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
@@ -172,37 +161,42 @@
    the regular image order indexing strategy, blocks indexed in image order
    are called "fragments".
   Fragments are indexed in image order, left to right, then bottom to top,
-   from Y plane to Cb plane to Cr plane.*/
-struct oc_sb{
-  unsigned  coded_fully:1;
-  unsigned  coded_partially:1;
-  unsigned  quad_valid:4;
-  oc_sb_map map;
-};
+   from Y' plane to Cb plane to Cr plane.
 
+  The co-located fragments in all image planes corresponding to the location
+   of a single quadrant of a luma plane super block form a macro block.
+  Thus there is only a single set of macro blocks for all planes, each of which
+   contains between 6 and 12 fragments, depending on the pixel format.
+  Therefore macro block information is kept in a separate set of arrays from
+   super blocks to avoid unused space in the other planes.
+  The lists are indexed in super block order.
+  That is, the macro block corresponding to the macro block mbi in (luma plane)
+   super block sbi is at index (sbi<<2|mbi).
+  Thus the number of macro blocks in each dimension is always twice the number
+   of super blocks, even when only an odd number fall inside the coded frame.
+  These "extra" macro blocks are just an artifact of our internal data layout,
+   and not part of the coded stream; they are flagged with a negative MB mode.*/
 
 
-/*Macro block information.
-  The co-located fragments in all image planes corresponding to the location of
-   a single luma plane super block quadrant forms a macro block.
-  Thus there is only a single set of macro blocks for all planes, which
-   contains between 6 and 12 fragments, depending on the pixel format.
-  Therefore macro block information is kept in a separate array from super
-   blocks, to avoid unused space in the other planes.*/
-struct oc_mb{
-  /*The current macro block mode.
-    A negative number indicates the macro block lies entirely outside the
-     coded frame.*/
-  int           mode;
-  /*The X location of the macro block's upper-left hand pixel.*/
-  int           x;
-  /*The Y location of the macro block's upper-right hand pixel.*/
-  int           y;
-  /*The fragments that belong to this macro block in each color plane.
-    Fragments are stored in image order (left to right then top to bottom).
-    When chroma components are decimated, the extra fragments have an index of
-     -1.*/
-  oc_mb_map     map;
+
+/*A single quadrant of the map from a super block to fragment numbers.*/
+typedef ptrdiff_t       oc_sb_map_quad[4];
+/*A map from a super block to fragment numbers.*/
+typedef oc_sb_map_quad  oc_sb_map[4];
+/*A single plane of the map from a macro block to fragment numbers.*/
+typedef ptrdiff_t       oc_mb_map_plane[4];
+/*A map from a macro block to fragment numbers.*/
+typedef oc_mb_map_plane oc_mb_map[3];
+/*A motion vector.*/
+typedef signed char     oc_mv[2];
+
+
+
+/*Super block information.*/
+struct oc_sb_flags{
+  unsigned char coded_fully:1;
+  unsigned char coded_partially:1;
+  unsigned char quad_valid:4;
 };
 
 
@@ -227,35 +221,27 @@
 /*Fragment information.*/
 struct oc_fragment{
   /*A flag indicating whether or not this fragment is coded.*/
-  unsigned        coded:1;
-  /*A flag indicating that all of this fragment lies outside the displayable
+  unsigned   coded:1;
+  /*A flag indicating that this entire fragment lies outside the displayable
      region of the frame.
     Note the contrast with an invalid macro block, which is outside the coded
-     frame, not just the displayable one.*/
-  unsigned        invalid:1;
+     frame, not just the displayable one.
+    There are no fragments outside the coded frame by construction.*/
+  unsigned   invalid:1;
   /*The quality index used for this fragment's AC coefficients.*/
-  unsigned        qi:6;
-  /*The mode of the macroblock this fragment belongs to.
+  unsigned   qi:6;
+  /*The mode of the macroblock this fragment belongs to.*/
+  unsigned   mb_mode:3;
+  /*The index of the associated border information for fragments which lie
+     partially outside the displayable region.
+    For fragments completely inside or outside this region, this is -1.
     Note that the C standard requires an explicit signed keyword for bitfield
      types, since some compilers may treat them as unsigned without it.*/
-  signed int      mbmode:8;
+  signed int borderi:5;
   /*The prediction-corrected DC component.
     Note that the C standard requires an explicit signed keyword for bitfield
      types, since some compilers may treat them as unsigned without it.*/
-  signed int      dc:16;
-  /*A pointer to the portion of an image covered by this fragment in several
-     images.
-    The first three are reconstructed frame buffers, while the last is the
-     input image buffer.
-    The appropriate stride value is determined by the color plane the fragment
-     belongs in.*/
-  unsigned char  *buffer[4];
-  /*Information for fragments which lie partially outside the displayable
-     region.
-    For fragments completely inside or outside this region, this is NULL.*/
-  oc_border_info *border;
-  /*The motion vector used for this fragment.*/
-  oc_mv           mv;
+  signed int dc:16;
 };
 
 
@@ -263,21 +249,21 @@
 /*A description of each fragment plane.*/
 struct oc_fragment_plane{
   /*The number of fragments in the horizontal direction.*/
-  int nhfrags;
+  int       nhfrags;
   /*The number of fragments in the vertical direction.*/
-  int nvfrags;
+  int       nvfrags;
   /*The offset of the first fragment in the plane.*/
-  int froffset;
+  ptrdiff_t froffset;
   /*The total number of fragments in the plane.*/
-  int nfrags;
+  ptrdiff_t nfrags;
   /*The number of super blocks in the horizontal direction.*/
-  int nhsbs;
+  unsigned  nhsbs;
   /*The number of super blocks in the vertical direction.*/
-  int nvsbs;
+  unsigned  nvsbs;
   /*The offset of the first super block in the plane.*/
-  int sboffset;
+  unsigned  sboffset;
   /*The total number of super blocks in the plane.*/
-  int nsbs;
+  unsigned  nsbs;
 };
 
 
@@ -295,13 +281,14 @@
   void (*dequant_idct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64],
    int _last_zzi,int _ncoefs,ogg_uint16_t _dc_quant,
    const ogg_uint16_t _ac_quant[64]);
-  void (*state_frag_recon)(const oc_theora_state *_state,oc_fragment *_frag,
-   int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
    ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]);
   void (*state_frag_copy_list)(const oc_theora_state *_state,
-   const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli);
-  void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,int *_bv,
-   int _refi,int _pli,int _fragy0,int _fragy_end);  
+   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+   int _dst_frame,int _src_frame,int _pli);
+  void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
+   int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);  
   void (*restore_fpu)(void);
 };
 
@@ -310,78 +297,87 @@
 /*Common state information between the encoder and decoder.*/
 struct oc_theora_state{
   /*The stream information.*/
-  th_info             info;
+  th_info                     info;
   /*Table for shared accelerated functions.*/
-  oc_base_opt_vtable  opt_vtable;
+  oc_base_opt_vtable          opt_vtable;
   /*CPU flags to detect the presence of extended instruction sets.*/
-  ogg_uint32_t        cpu_flags;
+  ogg_uint32_t                cpu_flags;
   /*The fragment plane descriptions.*/
-  oc_fragment_plane   fplanes[3];
-  /*The total number of fragments in a single frame.*/
-  int                 nfrags;
+  oc_fragment_plane           fplanes[3];
   /*The list of fragments, indexed in image order.*/
-  oc_fragment        *frags;
+  oc_fragment                *frags;
+  /*The the offset into the reference frame buffer to the upper-left pixel of
+     each fragment.*/
+  ptrdiff_t                  *frag_buf_offs;
+  /*The motion vector for each fragment.*/
+  oc_mv                      *frag_mvs;
+  /*The total number of fragments in a single frame.*/
+  ptrdiff_t                   nfrags;
+  /*The list of super block maps, indexed in image order.*/
+  oc_sb_map                  *sb_maps;
+  /*The list of super block flags, indexed in image order.*/
+  oc_sb_flags                *sb_flags;
   /*The total number of super blocks in a single frame.*/
-  int                 nsbs;
-  /*The list of super blocks, indexed in image order.*/
-  oc_sb              *sbs;
+  unsigned                    nsbs;
   /*The number of macro blocks in the X direction.*/
-  int                 nhmbs;
+  unsigned                    nhmbs;
   /*The number of macro blocks in the Y direction.*/
-  int                 nvmbs;
+  unsigned                    nvmbs;
   /*The total number of macro blocks.*/
-  int                 nmbs;
-  /*The list of macro blocks, indexed in super block order.
-    That is, the macro block corresponding to the macro block mbi in (luma
-     plane) super block sbi is (sbi<<2|mbi).*/
-  oc_mb              *mbs;
+  size_t                      nmbs;
+  /*The fragments from each color plane that belong to each macro block.
+    Fragments are stored in image order (left to right then top to bottom).
+    When chroma components are decimated, the extra fragments have an index of
+     -1.*/
+  oc_mb_map                  *mb_maps;
+  /*The list of macro block modes.
+    A negative number indicates the macro block lies entirely outside the
+     coded frame.*/
+  signed char                *mb_modes;
   /*The list of coded fragments, in coded order.*/
-  int                *coded_fragis;
+  ptrdiff_t                  *coded_fragis;
   /*The number of coded fragments in each plane.*/
-  int                 ncoded_fragis[3];
+  ptrdiff_t                   ncoded_fragis[3];
   /*The list of uncoded fragments.
     This just past the end of the list, which is in reverse order, and
      uses the same block of allocated storage as the coded_fragis list.*/
-  int                *uncoded_fragis;
+  ptrdiff_t                  *uncoded_fragis;
   /*The number of uncoded fragments in each plane.*/
-  int                 nuncoded_fragis[3];
+  ptrdiff_t                   nuncoded_fragis[3];
   /*The list of coded macro blocks in the Y plane, in coded order.*/
-  int                *coded_mbis;
+  unsigned                   *coded_mbis;
   /*The number of coded macro blocks in the Y plane.*/
-  int                 ncoded_mbis;
-  /*A copy of the image data used to fill the input pointers in each fragment.
-    If the data pointers or strides change, these input pointers must be
-     re-populated.*/
-  th_ycbcr_buffer     input;
+  size_t                      ncoded_mbis;
   /*The number of unique border patterns.*/
-  int                 nborders;
-  /*The storage for the border info for all border fragments.
-    This data is pointed to from the appropriate fragments.*/
-  oc_border_info      borders[16];
+  int                         nborders;
+  /*The unique border patterns for all border fragments.
+    The borderi field of fragments which straddle the border indexes this
+     list.*/
+  oc_border_info              borders[16];
   /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
-  int                 ref_frame_idx[3];
+  int                         ref_frame_idx[3];
   /*The actual buffers used for the previously decoded frames.*/
-  th_ycbcr_buffer     ref_frame_bufs[3];
+  th_ycbcr_buffer             ref_frame_bufs[3];
   /*The storage for the reference frame buffers.*/
-  unsigned char      *ref_frame_data;
+  unsigned char              *ref_frame_data[3];
   /*The frame number of the last keyframe.*/
-  ogg_int64_t         keyframe_num;
+  ogg_int64_t                 keyframe_num;
   /*The frame number of the current frame.*/
-  ogg_int64_t         curframe_num;
+  ogg_int64_t                 curframe_num;
   /*The granpos of the current frame.*/
-  ogg_int64_t         granpos;
+  ogg_int64_t                 granpos;
   /*The type of the current frame.*/
-  int                 frame_type;
+  int                         frame_type;
   /*The quality indices of the current frame.*/
-  int                 qis[3];
+  unsigned char               qis[3];
   /*The number of quality indices used in the current frame.*/
-  int                 nqis;
+  unsigned char               nqis;
   /*The dequantization tables.
     Note that these are stored in zig-zag order.*/
-  oc_quant_table     *dequant_tables[2][3];
-  oc_quant_tables     dequant_table_data[2][3];
+  oc_quant_table             *dequant_tables[2][3];
+  oc_quant_tables OC_ALIGN16  dequant_table_data[2][3];
   /*Loop filter strength parameters.*/
-  unsigned char       loop_filter_limits[64];
+  unsigned char               loop_filter_limits[64];
 };
 
 
@@ -398,25 +394,22 @@
 
 
 /*A map from the index in the zig zag scan to the coefficient number in a
-   block.
-  The extra 64 entries send out of bounds indexes to index 64.
-  This is used to safely ignore invalid zero runs when decoding
-   coefficients.*/
-extern const int OC_FZIG_ZAG[128];
+   block.*/
+extern const unsigned char OC_FZIG_ZAG[64];
 /*A map from the coefficient number in a block to its index in the zig zag
    scan.*/
-extern const int OC_IZIG_ZAG[64];
+extern const unsigned char OC_IZIG_ZAG[64];
 /*The predictor frame to use for each macro block mode.*/
-extern const int OC_FRAME_FOR_MODE[OC_NMODES];
+extern const unsigned char OC_FRAME_FOR_MODE[OC_NMODES];
 /*A map from physical macro block ordering to bitstream macro block
    ordering within a super block.*/
-extern const int OC_MB_MAP[2][2];
-/*A list of the indices in the oc_mb.map array that can be valid for each of
+extern const unsigned char OC_MB_MAP[2][2];
+/*A list of the indices in the oc_mb_map array that can be valid for each of
    the various chroma decimation types.*/
-extern const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
-/*The number of indices in the oc_mb.map array that can be valid for each of
+extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
+/*The number of indices in the oc_mb_map array that can be valid for each of
    the various chroma decimation types.*/
-extern const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
+extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
 /*A table of functions used to fill in the Cb,Cr plane motion vectors for a
    macro block when 4 different motion vectors are specified in the luma
    plane.*/
@@ -432,7 +425,7 @@
 void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
  const th_ycbcr_buffer _src);
 
-int oc_dct_token_skip(int _token,int _extra_bits);
+ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits);
 
 int oc_frag_pred_dc(const oc_fragment *_frag,
  const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]);
@@ -470,13 +463,14 @@
 void oc_dequant_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],
  const ogg_int16_t _x[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]);
-void oc_state_frag_recon(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]);
-void oc_state_frag_copy_list(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_state_frag_copy_list(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu(const oc_theora_state *_state);
 
 /*Default pure-C implementations.*/
@@ -491,13 +485,14 @@
 void oc_dequant_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64],
  int _last_zzi,int _ncoefs,ogg_uint16_t _dc_quant,
  const ogg_uint16_t _ac_quant[64]);
-void oc_state_frag_recon_c(const oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_quant,const ogg_uint16_t _ac_quant[64]);
-void oc_state_frag_copy_list_c(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu_c(void);
 
 /*We need a way to call a few encoder functions without introducing a link-time
@@ -508,8 +503,8 @@
   We do a similar thing for the decoder in case we ever decide to split off a
    common base library.*/
 typedef void (*oc_state_clear_func)(theora_state *_th);
-typedef int (*oc_state_control_func)(theora_state *th,int req,
- void *buf,size_t buf_sz);
+typedef int (*oc_state_control_func)(theora_state *th,int _req,
+ void *_buf,size_t _buf_sz);
 typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
  ogg_int64_t _granulepos);
 typedef double (*oc_state_granule_time_func)(theora_state *_th,



More information about the commits mailing list