[xiph-commits] r10030 - in experimental/derf/theora-exp: examples include/theora lib unix win32/msvc60

Sat Sep 17 17:58:25 PDT 2005

Author: tterribe
Date: 2005-09-17 17:58:06 -0700 (Sat, 17 Sep 2005)
New Revision: 10030

Added:
   experimental/derf/theora-exp/lib/encmsc.c
   experimental/derf/theora-exp/lib/encvbr.c
   experimental/derf/theora-exp/lib/encvbr.h
Modified:
   experimental/derf/theora-exp/examples/dump_video.c
   experimental/derf/theora-exp/include/theora/codec.h
   experimental/derf/theora-exp/include/theora/theoraenc.h
   experimental/derf/theora-exp/lib/bitrate.c
   experimental/derf/theora-exp/lib/decode.c
   experimental/derf/theora-exp/lib/encint.h
   experimental/derf/theora-exp/lib/encode.c
   experimental/derf/theora-exp/lib/fdct.c
   experimental/derf/theora-exp/lib/fdct.h
   experimental/derf/theora-exp/lib/huffdec.c
   experimental/derf/theora-exp/lib/impmap.c
   experimental/derf/theora-exp/lib/mcenc.c
   experimental/derf/theora-exp/lib/ocintrin.h
   experimental/derf/theora-exp/lib/psych.c
   experimental/derf/theora-exp/lib/psych.h
   experimental/derf/theora-exp/unix/Makefile
   experimental/derf/theora-exp/win32/msvc60/dump_video.dsp
   experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp
   experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp
   experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp
   experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp
Log:
Encoder architecture improvements.
- Reconfigurable encoder pipeline elements have been added.
  Still more work to be done on actually interleaving operations,
   but the framework is there.
- A theora_encode_ctl() API for setting different encoding modes.
  More modes need to be added (e.g., CBR)
- All VBR-specific code has been broken into its own module.
  This gives it a cleaner separation from the packet assembly,
    etc., code, and lets you see what parts need to be
    re-implemented for a new encoding mode.

No new encoding modes have actually been added yet, and the VBR
 mode is still not usable, though two small bug fixes are
 included: the per-block QI values for I frames were being set
 before the quantizers for the frame were actually chosen, and
 the lowest contrast level was always being used in psych.c.
dump_video.c has also been fixed to be C90 compliant again.
In addition there were some minor documentation clean-ups, and
 with my laptop dead I took the opportunity to update the Win32
 project files.

Modified: experimental/derf/theora-exp/examples/dump_video.c
===================================================================

--- experimental/derf/theora-exp/examples/dump_video.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/examples/dump_video.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -174,10 +174,11 @@
   int long_option_index;
   int c;
 
+  int frames = 0;
+
   FILE *infile = stdin;
   outfile = stdout;
 
-  int frames = 0;
 
 #ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
   /* Beware the evil ifdef. We avoid these where we can, but this one we
@@ -345,7 +346,7 @@
         if(theora_decode_packetin(td,&op,&videobuf_granulepos)>=0){
           videobuf_time=theora_granule_time(td,videobuf_granulepos);
           videobuf_ready=1;
-	  frames++;
+          frames++;
         }
 
       }else

Modified: experimental/derf/theora-exp/include/theora/codec.h
===================================================================
--- experimental/derf/theora-exp/include/theora/codec.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/include/theora/codec.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -151,6 +151,8 @@
  *  the initial 'info' header packet.
  * To initialize an encoder, the application fills in this structure and
  *  passes it to theora_encode_alloc().
+ * A default encoding mode is chosen based on the values of the #quality and
+ *  #target_bitrate fields.
  * On decode, it is filled in by theora_decode_headerin(), and then passed to
  *  theora_decode_alloc().
  * 
@@ -228,12 +230,18 @@
   theora_colorspace  colorspace;
   /**The pixel format.*/
   theora_pixel_fmt   pixel_fmt;
-  /**The target bit-rate in bits per second. */
+  /**The target bit-rate in bits per second.
+     If initializing an encoder with this struct, set this field to a non-zero
+      value to activate CBR encoding by default.*/
   /*TODO: Current encoder does not support CBR mode, or anything like it.
     We also don't really know what nominal rate each quality level
      corresponds to yet.*/
   int                target_bitrate;
-  /**The target quality level.*/
+  /**The target quality level.
+     Valid values range from 0 to 63, inclusive, with higher values giving
+      higher quality.
+     If initializing an encoder with this struct, and #target_bitrate is set
+      to zero, VBR encoding at this quality will be activated by default.*/
   /*Currently this is set so that a qi of 0 corresponds to distortions of 24
      times the JND, and each increase by 16 halves that value.
     This gives us fine discrimination at low qualities, yet effective rate

Modified: experimental/derf/theora-exp/include/theora/theoraenc.h
===================================================================
--- experimental/derf/theora-exp/include/theora/theoraenc.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/include/theora/theoraenc.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -29,6 +29,53 @@
 
 
 
+/**The configuration information for the VBR encoding mode.
+ * This mode attempts to encode the video with a constant psychovisual
+ *  quality.
+ * It can be enabled by calling theora_encode_ctl() with #OC_ENCCTL_SETUP_VBR.
+ * See the #theora_info struct documentation for details on how the default
+ *  encoding mode is chosen.*/
+typedef struct theora_vbr_cfg{
+  /**The target quality index.
+   * Valid values range from 0 to 63, inclusive, with higher values giving
+   *  higher quality.
+   * Note that, in this case, this corresponds to a <em>perceptual</em>
+   *  quality, and does not translate directly into a quantization setting.
+   * Limits on the admissible quantizers can be controlled below.*/
+  int qi;
+  /**The minimum quality to use for a keyframe.
+   * Valid values range from 0 to 63, inclusive, with higher values giving
+   *  higher quality.*/
+  int kf_qi_min;
+  /**The maximum quality to use for a keyframe.
+   * Valid values range from 0 to 63, inclusive, with higher values giving
+   *  higher quality.
+   * This must be at least as large as #kf_qi_max.*/
+  int kf_qi_max;
+  /**The minimum quality to use for a delta frame.
+   * Valid values range from 0 to 63, inclusive, with higher values giving
+   *  higher quality.*/
+  int df_qi_min;
+  /**The maximum quality to use for a delta frame.
+   * Valid values range from 0 to 63, inclusive, with higher values giving
+   *  higher quality.
+   * This must be at least as large as #df_qi_max.*/
+  int df_qi_max;
+}theora_vbr_cfg;
+
+/**The configuration information for the constant QI encoding mode.
+ * This mode encodes the video with a constant quality index.
+ * This is the fastest encoding mode.
+ * It can be enabled by calling theora_encode_ctl() with #OC_ENCCTL_SETUP_CQI.
+ * See the #theora_info struct documentation for details on how the default
+ *  encoding mode is chosen.*/
+typedef struct theora_cqi_cfg{
+  /**The target quality index.
+     Valid values range from 0 to 63, inclusive, with higher values giving
+      higher quality.*/
+  int qi;
+}theora_cqi_cfg;
+
 /**\name theora_encode_ctl() codes
  * \anchor encctlcodes
  * These are the available request codes for theora_encode_ctl().
@@ -41,7 +88,7 @@
  *  this call.
  * <tt>NULL</tt> may be specified to revert to the default tables.
  *
- * \param[in] _buf <tt>theora_huff_code[#OC_NHUFFMAN_TABLES][#OC_NDCT_TOKENS]</tt>
+ * \param[in] _buf <tt>#theora_huff_code[#OC_NHUFFMAN_TABLES][#OC_NDCT_TOKENS]</tt>
  * \retval OC_FAULT  \a _enc_ctx is <tt>NULL</tt>.
  * \retval OC_EINVAL Encoding has already begun or one or more of the given
  *                     tables is not full or prefix-free, \a _buf is
@@ -88,7 +135,7 @@
  *  when any of the luma blocks in a macro block are not coded.
  * It also includes using the VP3 quantization tables and Huffman codes; if you
  *  set them explicitly after calling this function, the resulting stream will
- *  not by VP3-compatible.
+ *  not be VP3-compatible.
  * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
  *  material, or when using a picture region smaller than the full frame (e.g.
  *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
@@ -102,7 +149,7 @@
  *                   or 0 to disable it (the default).
  * \param[out] _buf <tt>int</tt>: 1 if all bitstream features required for
  *                   VP3-compatibility could be set, and 0 otherwise.
- *                  The latter will be returned if with pixel format is not
+ *                  The latter will be returned if the pixel format is not
  *                   4:2:0, the picture region is smaller than the full frame,
  *                   or if encoding has begun, preventing the quantization
  *                   tables and codebooks from being set.
@@ -110,6 +157,69 @@
  * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
  * \retval OC_IMPL   Not supported by this implementation.*/
 #define OC_ENCCTL_SET_VP3_COMPATIBLE (10)
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but bitrate will also increase, and overall
+ *  rate/distortion performance will likely decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. CQI, etc.).
+ *
+ * \param[out] _buf int: The maximum encoding speed level.
+ * \retval OC_FAULT  \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define OC_ENCCTL_GET_SPLEVEL_MAX (12)
+/**Sets the speed level.
+ * By default, the slowest speed (0) is used.
+ *
+ * \param[in] _buf int: The new encoding speed level.
+ *                      0 is slowest, larger values use less CPU.
+ * \retval OC_FAULT  \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #OC_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define OC_ENCCTL_SET_SPLEVEL (14)
+/**Puts the encoder in VBR mode.
+ * This can be done at any time during the encoding process, with different
+ *  configuration parameters, to encode different regions of the video segment
+ *  with different qualities.
+ * See the #theora_info struct documentation for details on how the default
+ *  encoding mode is chosen.
+ *
+ * \param[in] _buf <tt>#theora_vbr_cfg</tt>: the configuration parameters.
+ *                 This may be <tt>NULL</tt>, in which the current VBR
+ *                  configuration is unchanged.
+ *                 The default is to use the QI setting passed in via the
+ *                  #theora_info struct when the encoder was initialized, with
+ *                  a full range of admissible quantizers.
+ * \retval OC_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval OC_EINVAL The configuration parameters  do not meet one of their
+ *                    stated requirements, \a _buf is <tt>NULL</tt> and
+ *                    \a _buf_sz is not zero, or \a _buf is non-<tt>NULL</tt>
+ *                    and \a _buf_sz is not <tt>sizeof(#theora_vbr_cfg)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define OC_ENCCTL_SETUP_VBR (16)
+/**Puts the encoder in CQI mode.
+ * This can be done at any time during the encoding process, with different QI
+ *  values.
+ * See the #theora_info struct documentation for details on how the default
+ *  encoding mode is chosen.
+ *
+ * \param[in] _buf <tt>#theora_cqi_cfg</tt>: the configuration parameters.
+ *                 This may be <tt>NULL</tt>, in which case the current CQI
+ *                  configuration is unchanged.
+ *                 The default is to use the QI setting passed in via the
+ *                  #theora_info struct when the encoder was initialized.
+ * \retval OC_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(#theora_cqi_cfg)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define OC_ENCCTL_SETUP_CQI (18)
 /*@}*/
 
 

Modified: experimental/derf/theora-exp/lib/bitrate.c
===================================================================
--- experimental/derf/theora-exp/lib/bitrate.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/bitrate.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -8197,9 +8197,13 @@
   }
 };
 
+ogg_uint16_t OC_RES_DISTORTS[64][3][OC_NMODES][16];
+
 #if defined(OC_BITRATE_STATS)
 static ogg_int64_t OC_RES_BITRATE_ACCUM[64][3][OC_NMODES][16];
 static int         OC_RES_BITRATE_SAMPLES[64][3][OC_NMODES][16];
+static ogg_int64_t OC_RES_DISTORT_ACCUM[64][3][OC_NMODES][16];
+static ogg_int64_t OC_RES_DISTORT_SAMPLES[64][3][OC_NMODES][16];
 
 #include <stdio.h>
 
@@ -8209,6 +8213,8 @@
   if(in==NULL)return;
   fread(OC_RES_BITRATE_ACCUM,sizeof(OC_RES_BITRATE_ACCUM),1,in);
   fread(OC_RES_BITRATE_SAMPLES,sizeof(OC_RES_BITRATE_SAMPLES),1,in);
+  fread(OC_RES_DISTORT_ACCUM,sizeof(OC_RES_DISTORT_ACCUM),1,in);
+  fread(OC_RES_DISTORT_SAMPLES,sizeof(OC_RES_DISTORT_SAMPLES),1,in);
   /*Update the current bitrate statistics in use.*/
   {
     int qi;
@@ -8221,9 +8227,16 @@
           for(erri=0;erri<16;erri++){
             int n;
             n=OC_RES_BITRATE_SAMPLES[qi][pli][modei][erri];
-            if(!n)continue;
-            OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
-             ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+            if(n!=0){
+              OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+               ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+            }
+            n=OC_RES_DISTORT_SAMPLES[qi][pli][modei][erri];
+            if(n!=0){
+              OC_RES_DISTORTS[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+               ((OC_RES_DISTORT_ACCUM[qi][pli][modei][erri]<<OC_DIS_SCALE+1)+n)/
+               (n<<1));
+            }
           }
         }
       }
@@ -8236,6 +8249,8 @@
   out=fopen("modedec.stats","wb");
   fwrite(OC_RES_BITRATE_ACCUM,sizeof(OC_RES_BITRATE_ACCUM),1,out);
   fwrite(OC_RES_BITRATE_SAMPLES,sizeof(OC_RES_BITRATE_SAMPLES),1,out);
+  fwrite(OC_RES_DISTORT_ACCUM,sizeof(OC_RES_DISTORT_ACCUM),1,out);
+  fwrite(OC_RES_DISTORT_SAMPLES,sizeof(OC_RES_DISTORT_SAMPLES),1,out);
 }
 
 void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]){
@@ -8251,6 +8266,7 @@
   };
   ogg_uint32_t  eob_bits[64];
   int           eob_runs[64];
+  int           ref_idx;
   int           pli;
   int          *coded_fragi;
   int          *coded_fragi_end;
@@ -8330,8 +8346,13 @@
      bit count per block.*/
   /*Go through the actual encoded tokens and assign the bits used by each to
      the fragment(s) they came from.*/
+  ref_idx=_enc->state.ref_frame_idx[OC_FRAME_SELF];
   coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
   for(pli=0;pli<3;pli++){
+    int ref_ystride;
+    int cur_ystride;
+    cur_ystride=_enc->state.input[pli].ystride;
+    ref_ystride=_enc->state.ref_frame_bufs[ref_idx][pli].ystride;
     coded_fragi_end+=_enc->state.ncoded_fragis[pli];
     for(;coded_fragi<coded_fragi_end;coded_fragi++){
       oc_fragment *frag;
@@ -8375,10 +8396,43 @@
         }
         err_bin=efrag->eerror>>(frag->mbmode==OC_MODE_INTRA?8:6);
         err_bin=OC_MINI(15,err_bin);
-        OC_RES_BITRATE_ACCUM[
-         frag->invalid?0:frag->qi][pli][frag->mbmode][err_bin]+=frag_bits;
-        OC_RES_BITRATE_SAMPLES[
-         frag->invalid?0:frag->qi][pli][frag->mbmode][err_bin]++;
+        if(!frag->invalid){
+          unsigned char *cur;
+          unsigned char *ref;
+          int            err;
+          int            i;
+          int            j;
+          OC_RES_BITRATE_ACCUM[frag->qi][pli][frag->mbmode][err_bin]+=frag_bits;
+          OC_RES_BITRATE_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]++;
+          err=0;
+          cur=frag->buffer[OC_FRAME_IO];
+          ref=frag->buffer[ref_idx];
+          if(frag->border!=NULL){
+            ogg_int64_t mask;
+            mask=frag->border->mask;
+            for(i=0;i<8;i++){
+              for(j=0;j<8;j++){
+                if(mask&1){
+                  err+=abs(cur[j]-ref[j]);
+                }
+                mask>>=1;
+              }
+              cur+=cur_ystride;
+              ref+=ref_ystride;
+            }
+            OC_RES_DISTORT_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]+=
+             frag->border->npixels;
+          }
+          else{
+            for(i=0;i<8;i++){
+              for(j=0;j<8;j++)err+=abs(cur[j]-ref[j]);
+              cur+=cur_ystride;
+              ref+=ref_ystride;
+            }
+            OC_RES_DISTORT_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]+=64;
+          }
+          OC_RES_DISTORT_ACCUM[frag->qi][pli][frag->mbmode][err_bin]+=err;
+        }
       }
     }
   }
@@ -8393,9 +8447,16 @@
           for(erri=0;erri<16;erri++){
             int n;
             n=OC_RES_BITRATE_SAMPLES[qi][pli][modei][erri];
-            if(!n)continue;
-            OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
-             ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+            if(n!=0){
+              OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+               ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+            }
+            n=OC_RES_DISTORT_SAMPLES[qi][pli][modei][erri];
+            if(n!=0){
+              OC_RES_DISTORTS[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+               ((OC_RES_DISTORT_ACCUM[qi][pli][modei][erri]<<OC_DIS_SCALE+1)+n)/
+               (n<<1));
+            }
           }
         }
       }

Modified: experimental/derf/theora-exp/lib/decode.c
===================================================================
--- experimental/derf/theora-exp/lib/decode.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/decode.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -1260,7 +1260,7 @@
     size_t frame_sz;
     frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height;
     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
-      _dec->variances=(ogg_uint32_t *)_ogg_realloc(_dec->variances,
+      _dec->variances=(int *)_ogg_realloc(_dec->variances,
        _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
       _dec->pp_frame_data=(unsigned char *)_ogg_realloc( 
        _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
@@ -1474,6 +1474,8 @@
     zzi=OC_MINI(zzi,64);
     dct_coeffs[0]=(ogg_int16_t)frag->dc;
     iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
+    /*last_zzi is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
     oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
      iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
   }
@@ -2103,13 +2105,13 @@
     _dec->state.curframe_num++;
     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
   }
+#if defined(OC_DUMP_IMAGES)
+  oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
   return 0;
 }
 
 int theora_decode_ycbcr_out(theora_dec_ctx *_dec,theora_ycbcr_buffer _ycbcr){
   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
-#if defined(OC_DUMP_IMAGES)
-  oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
-#endif
   return 0;
 }

Modified: experimental/derf/theora-exp/lib/encint.h
===================================================================
--- experimental/derf/theora-exp/lib/encint.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encint.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -4,12 +4,12 @@
 # include "theora/theoraenc.h"
 # include "internal.h"
 
+typedef struct oc_enc_pipe_stage      oc_enc_pipe_stage;
 typedef struct oc_fragment_enc_info   oc_fragment_enc_info;
 typedef struct oc_mb_enc_info         oc_mb_enc_info;
 typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
-typedef struct oc_impmap_ctx          oc_impmap_ctx;
+typedef struct oc_enc_vbr_ctx         oc_enc_vbr_ctx;
 typedef struct oc_mcenc_ctx           oc_mcenc_ctx;
-typedef struct oc_psych_ctx           oc_psych_ctx;
 typedef struct theora_enc_ctx         oc_enc_ctx;
 
 # include "fdct.h"
@@ -18,6 +18,22 @@
 
 #define OC_1_LN2 (1.4426950408889634073F)
 
+/*Encoding modes.*/
+#define OC_ENC_MODE_VBR (0)
+#define OC_ENC_MODE_CQI (1)
+/*
+Not yet implemented:
+#define OC_ENC_MODE_CBR (2)
+#define OC_ENC_MODE_RDO (3)
+*/
+
+/*The function used to set the speed for the current encoding mode.
+  _speed: The encoding speed to use.
+          Higher values should provide faster encoding, at reduced
+           rate-distortion performance.
+          This will always be in the range [0..._enc->speed_max].*/
+typedef void (*oc_enc_set_speed_func)(oc_enc_ctx *_enc,int _speed);
+
 /*Constants for the packet-out state machine specific to the encoder.*/
 
 /*Next packet to emit: Data packet, but none are ready yet.*/
@@ -27,6 +43,32 @@
 
 
 
+/*An encoder pipeline stage.*/
+struct oc_enc_pipe_stage{
+  /*The encoder this pipeline stage belongs to.*/
+  oc_enc_ctx        *enc;
+  /*The next stage in the pipeline.*/
+  oc_enc_pipe_stage *next;
+  /*The number of rows processed so far in each plane.*/
+  int                y_procd[3];
+  /*Called before processing the first stripe.
+    This does not need to call the next stage's start function.
+    Return: 0 on success, or a negative value on error.*/
+  int (*pipe_start)(oc_enc_pipe_stage *_stage);
+  /*Called for each stripe as it becomes available.
+    This function is responsible for calling the next function in the chain.
+    It may do so in smaller or larger stripes than are passed to it, at its
+     discretion.
+    _y_avail: Rows 0 through _y_avail[pli] in plane pli will be available for
+               processing.
+    Return: 0 on success, or a negative value on error.*/
+  int (*pipe_proc)(oc_enc_pipe_stage *_stage,int _y_avail[3]);
+  /*Called after processing the last stripe.
+    This does not need to call the next stage's end function.
+    Return: 0 on success, or a negative value on error.*/
+  int (*pipe_end)(oc_enc_pipe_stage *_stage);
+};
+
 /*Fragment information specific to encoding.*/
 struct oc_fragment_enc_info{
   /*The DCT coefficients for coding the fragment in intra mode.
@@ -107,6 +149,22 @@
 struct theora_enc_ctx{
   /*Shared encoder/decoder state.*/
   oc_theora_state          state;
+  /*The start of the encoder pipeline.*/
+  oc_enc_pipe_stage       *pipe;
+  /*The maximum speed setting for the current encoding mode.*/
+  int                      speed_max;
+  /*The function used to set the speed level for the current encoding mode.*/
+  oc_enc_set_speed_func    set_speed;
+  /*The INTRA fDCT pipe stage.*/
+  oc_enc_pipe_stage        fdct_pipe;
+  /*The uncoded fragment copying pipe stage.*/
+  oc_enc_pipe_stage        copy_pipe;
+  /*The loop filter pipe stage.*/
+  oc_enc_pipe_stage        loop_pipe;
+  /*The border filling pipe stage.*/
+  oc_enc_pipe_stage        fill_pipe;
+  /*The packet assembly pipe stage.*/
+  oc_enc_pipe_stage        pack_pipe;
   /*Whether or not packets are ready to be emitted.
     This takes on negative values while there are remaining header packets to
      be emitted, reaches 0 when the codec is ready for input, and goes to 1
@@ -123,36 +181,37 @@
   int                      nblock_coded_flags;
   /*Special buffer used for the coded fragment flags.*/
   oggpack_buffer           opb_coded_flags;
-  /*The estimated bit cost of the current frame.*/
-  int                      est_bits;
   /*Encoder-specific fragment information.*/
   oc_fragment_enc_info    *frinfo;
   /*Encoder-specific macro block information.*/
   oc_mb_enc_info          *mbinfo;
-  /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
-  unsigned                 dc_tol_mins[3];
+  /*Context information used to perform motion estimation.*/
+  oc_mcenc_ctx            *mcenc;
+  /*Context information used for VBR encoding.*/
+  oc_enc_vbr_ctx          *vbr;
   /*The qi value lists selected for each potential frame type.*/
   int                      qis[2][3];
   /*The number of qi values in the list for each frame type.*/
   int                      nqis[2];
+  /*The number of coded fragments.*/
+  int                      ncoded_frags;
+  /*The current uncoded_fragi index being copied to each plane.*/
+  int                      uncoded_fragii[3];
   /*The macro-block mode scheme chooser.*/
   oc_mode_scheme_chooser   mode_scheme_chooser;
   /*The motion vector scheme chosen.*/
   int                      mv_scheme;
-  /*Context information used to perform motion estimation.*/
-  oc_mcenc_ctx            *mcenc;
-  /*Context information used to generate the importance map.*/
-  oc_impmap_ctx           *impmap;
-  /*Context information used to generate low-level perceptual weightings.*/
-  oc_psych_ctx            *psych;
   /*The maximum distance between keyframes.*/
   ogg_uint32_t             keyframe_frequency_force;
+  /*Whether or not VP3-compatibility is enabled.*/
+  int                      vp3_compatible;
+  /*Whether or not the loop filter is enabled.
+    This is determined each frame, based on the quantizer it is encoded with.*/
+  int                      loop_filter_enabled;
+  /*The bounding value array used for the loop filter.*/
+  int                      bounding_values[512];
   /*The huffman tables in use.*/
   theora_huff_code         huff_codes[OC_NHUFFMAN_TABLES][OC_NDCT_TOKENS];
-  /*The scale factor for the current quality setting.*/
-  float                    qscale;
-  /*Whether or not VP3-compatibility is enabled.*/
-  int                      vp3_compatible;
   /*The quantization parameters in use.*/
   theora_quant_info        qinfo;
   /*Pointers to the quantization tables in use.*/
@@ -173,6 +232,28 @@
   ogg_uint16_t           **extra_bits;
 };
 
+extern const int OC_MODE_SCHEMES[7][OC_NMODES];
+extern const int OC_DCT_VAL_CAT_SIZES[6];
+extern const int OC_DCT_VAL_CAT_SHIFTS[6];
+extern const int OC_MODE_HAS_MV[OC_NMODES];
+extern const theora_huff_code OC_MV_CODES[2][63];
+
+/*The number of fractional bits in bitrate statistics.*/
+#define OC_BIT_SCALE (7)
+/*The number of fractional bits in distortion statistics.*/
+#define OC_DIS_SCALE (9)
+
+/*Estimated bits needed to code a residual given the: quality index, color
+   plane, macro-block mode, and a SAD bin.
+  SAD values for a block are divided by 256 for INTRA mode and 64 for INTER
+   modes to find the appropriate bin.*/
+extern ogg_uint16_t OC_RES_BITRATES[64][3][OC_NMODES][16];
+
+#if defined(OC_BITRATE_STATS)
+void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]);
+#endif
+
+
 int oc_sad8_fullpel(const unsigned char *_cur,int _cur_stride,
  const unsigned char *_ref,int _ref_stride);
 int oc_sad8_fullpel_border(const unsigned char *_cur,int _cur_stride,
@@ -183,34 +264,33 @@
  const unsigned char *_ref0,const unsigned char *_ref1,int _ref_stride,
  ogg_int64_t _mask);
 
-void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
- ogg_int16_t _dct_vals[64],int _ystride,int _framei);
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser);
+int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode);
+void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mode);
+void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
+ int _mode_counts[OC_NMODES]);
 
 oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc);
 void oc_mcenc_free(oc_mcenc_ctx *_mcenc);
-void oc_mcenc_analyze(oc_mcenc_ctx *_mcenc);
 int oc_mcenc_search_1mv(oc_mcenc_ctx *_mcenc,int _mbi,int _frame);
+oc_enc_pipe_stage *oc_mcenc_prepend_to_pipe(oc_mcenc_ctx *_mcenc,
+ oc_enc_pipe_stage *_next);
 
-oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
-void oc_impmap_free(oc_impmap_ctx *_impmap);
-void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration);
+oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc);
+void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr);
+int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg);
 
-oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
-void oc_psych_free(oc_psych_ctx *_psych);
-void oc_psych_scan(oc_psych_ctx *_psych,float _contrast);
+void oc_enc_set_speed_null(oc_enc_ctx *_enc,int _speed);
+void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
+ ogg_int16_t _dct_vals[64],int _ystride,int _framei);
+int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
+ int _dy,int _pli,int _frame);
+int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+void oc_enc_do_inter_dcts(oc_enc_ctx *_enc);
+void oc_enc_merge_eob_runs(oc_enc_ctx *_enc);
 
-/*The number of fractional bits in bitrate statistics.*/
-#define OC_BIT_SCALE (7)
-
-/*Estimated bits needed to code a residual given the: quality index, color
-   plane, macro-block mode, and a SAD bin.
-  SAD values for a block are divided by 256 for INTRA mode and 64 for INTER
-   modes to find the appropriate bin.*/
-extern ogg_uint16_t OC_RES_BITRATES[64][3][OC_NMODES][16];
-
-#if defined(OC_BITRATE_STATS)
-void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]);
 #endif
-
-
-#endif

Added: experimental/derf/theora-exp/lib/encmsc.c
===================================================================
--- experimental/derf/theora-exp/lib/encmsc.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encmsc.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,234 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encint.h"
+
+
+
+/*The VLC code used for mode schemes 0-6.*/
+static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
+  {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
+};
+
+/*The CLC code used for mode scheme 7.*/
+static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
+  {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+  This need only be called once per encoder.
+  This is probably the best place to describe the various scheme's Theora uses
+   to encode macro block modes.
+  There are 8 possible schemes.
+  Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
+  The same set of Huffman codes is used for each of these 7 schemes, but the
+   mode assigned to each code varies.
+  Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
+   0 writes a custom mapping to the bitstream before all the modes.
+  Finally, scheme 7 just encodes each mode directly in 3 bits.
+  Be warned that the number assigned to each mode is slightly different in the
+   bitstream than in this implementation, so a translation needs to be done.
+
+  Mode name:                 Source-code index;  Bit-stream index:
+  OC_MODE_INTRA              0                   1
+  OC_MODE_INTER_NOMV         1                   0
+  OC_MODE_INTER_MV           2                   2
+  OC_MODE_INTER_MV_LAST      3                   3
+  OC_MODE_INTER_MV_LAST2     4                   4
+  OC_MODE_INTER_MV_FOUR      5                   6
+  OC_MODE_GOLDEN_NOMV        6                   7
+  OC_MODE_GOLDEN_MV          7                   5
+
+  The bit stream indices come from the constants assigned to each mode in the
+   original VP3 source.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+  int msi;
+  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+  for(msi=0;msi<7;msi++){
+    _chooser->mode_codes[msi]=OC_MODE_CODESA;
+    _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
+  }
+  _chooser->mode_codes[7]=OC_MODE_CODESB;
+}
+
+/*Reset the mode scheme chooser.
+  This needs to be called once for each frame, including the first.*/
+void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+  int i;
+  memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]=24;
+  memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
+  for(i=0;i<8;i++){
+    /*Scheme 7 should always start first, and scheme 0 should always start
+       last.*/
+    _chooser->scheme_list[i]=7-i;
+    _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
+  }
+}
+
+/*This is the real purpose of this data structure: not actually selecting a
+   mode scheme, but estimating the cost of coding a given mode given all the
+   modes selected so far.
+  This is done via opportunity cost: the cost is defined as the number of bits
+   required to encode all the modes selected so far including the current one
+   using the best possible scheme, minus the number of bits required to encode
+   all the modes selected so far not including the current one using the best
+   possible scheme.
+  The computational expense of doing this probably makes it overkill.
+  Just be happy we take a greedy approach instead of trying to solve the
+   global mode-selection problem (which is NP-hard).
+  _mode: The mode to determine the cost of.
+  Return: The number of bits required to code this mode.*/
+int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode){
+  int scheme0;
+  int scheme1;
+  int si;
+  int scheme_bits;
+  int best_bits;
+  int mode_bits;
+  scheme0=_chooser->scheme_list[0];
+  scheme1=_chooser->scheme_list[1];
+  best_bits=_chooser->scheme_bits[scheme0];
+  mode_bits=_chooser->mode_codes[scheme0][
+   _chooser->mode_ranks[scheme0][_mode]].nbits;
+  /*Typical case: If the difference between the best scheme and the next best
+     is greater than 6 bits, then adding just one mode cannot change which
+     scheme we use.*/
+  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+  /*Otherwise, check to see if adding this mode selects a different scheme
+     as the best.*/
+  si=1;
+  best_bits+=mode_bits;
+  do{
+    scheme1=_chooser->scheme_list[si];
+    /*For any scheme except 0, we can just use the bit cost of the mode's rank
+       in that scheme.*/
+    if(scheme1!=0){
+      scheme_bits=_chooser->scheme_bits[scheme1]+
+       _chooser->mode_codes[scheme1][
+       _chooser->mode_ranks[scheme1][_mode]].nbits;
+    }
+    else{
+      int ri;
+      /*For scheme 0, incrementing the mode count could potentially change the
+         mode's rank.
+        Find the index where the mode would be moved to in the optimal list,
+         and use its bit cost instead of the one for the mode's current
+         position in the list.*/
+      for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
+       _chooser->mode_counts[_mode]>=
+       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
+    }
+    if(scheme_bits<best_bits)best_bits=scheme_bits;
+    si++;
+  }
+  while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
+   _chooser->scheme_bits[scheme0]<=6);
+  return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Update the mode counts and per-scheme bit counts and re-order the scheme
+   lists once a mode has been selected.
+  _mode: The mode that was chosen.*/
+void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mode){
+  int ri;
+  int si;
+  _chooser->mode_counts[_mode]++;
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
+    int pmode;
+    pmode=_chooser->scheme0_list[ri-1];
+    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
+    _chooser->scheme0_ranks[pmode]++;
+    _chooser->scheme0_list[ri]=pmode;
+  }
+  _chooser->scheme0_ranks[_mode]=ri;
+  _chooser->scheme0_list[ri]=_mode;
+  /*Now add the bit cost for the mode to each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]+=
+     _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
+  }
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    sj=si;
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}
+
+/*Update the count for each mode by the given amounts, and then re-rank the
+   schemes appropriately.
+  This allows fewer (e.g. 1) updates to be done, at the cost of a more
+   expensive update.
+  _mode_counts: The amount to add to each mode count.*/
+void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
+ int _mode_counts[OC_NMODES]){
+  int mi;
+  int mj;
+  int ri;
+  int rj;
+  int si;
+  for(mi=0;mi<OC_NMODES;mi++){
+    _chooser->mode_counts[mi]+=_mode_counts[mi];
+  }
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=1;ri<OC_NMODES;ri++){
+    mi=_chooser->scheme0_list[ri];
+    rj=ri;
+    do{
+      mj=_chooser->scheme0_list[rj-1];
+      if(_chooser->mode_counts[mj]>=_chooser->mode_counts[mi])break;
+      _chooser->scheme0_ranks[mj]++;
+      _chooser->scheme0_list[rj]=mj;
+    }
+    while(--rj>0);
+    _chooser->scheme0_ranks[mi]=rj;
+    _chooser->scheme0_list[rj]=mi;
+  }
+  /*Now recompute the bit cost for each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]=0;
+    for(mi=0;mi<8;mi++){
+      _chooser->scheme_bits[si]+=
+       _chooser->mode_codes[si][_chooser->mode_ranks[si][mi]].nbits*
+        _chooser->mode_counts[mi];
+    }
+  }
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]+=24;
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    sj=si;
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}

Modified: experimental/derf/theora-exp/lib/encode.c
===================================================================
--- experimental/derf/theora-exp/lib/encode.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encode.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -9,7 +9,7 @@
 
 /*The mode orderings for the various mode coding schemes.
   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const int OC_MODE_SCHEMES[7][OC_NMODES]={
+const int OC_MODE_SCHEMES[7][OC_NMODES]={
   /*Last MV dominates.*/
   /*L P M N I G GM 4*/
   {4,3,2,0,1,7,5,6},
@@ -29,12 +29,20 @@
   {1,0,2,3,4,7,5,6}
 };
 
+/*The number of different DCT coefficient values that can be stored by each
+   of the different DCT value category tokens.*/
+const int OC_DCT_VAL_CAT_SIZES[6]={2,4,8,16,32,512};
+
+/*The number of bits to shift the sign of the DCT coefficient over by for each
+   of the different DCT value category tokens.*/
+const int OC_DCT_VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
+
 /*Whether or not each mode has a motion vector associated with it.
   Otherwise, the mode is assumed to use the 0,0 vector.*/
-static const int OC_MODE_HAS_MV[OC_NMODES]={0,0,1,1,1,1,0,1};
+const int OC_MODE_HAS_MV[OC_NMODES]={0,0,1,1,1,1,0,1};
 
 /*The Huffman codes used for motion vectors.*/
-static const theora_huff_code OC_MV_CODES[2][63]={
+const theora_huff_code OC_MV_CODES[2][63]={
   /*Scheme 1: VLC code.*/
   {
              {0xFF,8},{0xFD,8},{0xFB,8},{0xF9,8},{0xF7,8},{0xF5,8},{0xF3,8},
@@ -65,22 +73,6 @@
 
 
 
-static int oc_mvbitsa(int _dx,int _dy){
-  return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
-}
-
-
-
-static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
-  {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
-};
-
-static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
-  {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
-};
-
-
-
 int oc_sad8_fullpel(const unsigned char *_cur,int _cur_ystride,
  const unsigned char *_ref,int _ref_ystride){
   int i;
@@ -211,165 +203,6 @@
 
 
 
-/*Initialize the mode scheme chooser.
-  This need only be called once per encoder.
-  This is probably the best place to describe the various scheme's Theora uses
-   to encode macro block modes.
-  There are 8 possible schemes.
-  Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
-  The same set of Huffman codes is used for each of these 7 schemes, but the
-   mode assigned to each code varies.
-  Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
-   0 writes a custom mapping to the bitstream before all the modes.
-  Finally, scheme 7 just encodes each mode directly in 3 bits.
-  Be warned that the number assigned to each mode is slightly different in the
-   bitstream than in this implementation, so a translation needs to be done.
-
-  Mode name:                 Source-code index;  Bit-stream index:
-  OC_MODE_INTRA              0                   1
-  OC_MODE_INTER_NOMV         1                   0
-  OC_MODE_INTER_MV           2                   2
-  OC_MODE_INTER_MV_LAST      3                   3
-  OC_MODE_INTER_MV_LAST2     4                   4
-  OC_MODE_INTER_MV_FOUR      5                   6
-  OC_MODE_GOLDEN_NOMV        6                   7
-  OC_MODE_GOLDEN_MV          7                   5
-
-  The bit stream indices come from the constants assigned to each mode in the
-   original VP3 source.*/
-static void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
-  int msi;
-  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
-  for(msi=0;msi<7;msi++){
-    _chooser->mode_codes[msi]=OC_MODE_CODESA;
-    _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
-  }
-  _chooser->mode_codes[7]=OC_MODE_CODESB;
-}
-
-/*Reset the mode scheme chooser.
-  This needs to be called once for each frame, including the first.*/
-static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
-  int i;
-  memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
-  /*Scheme 0 starts with 24 bits to store the mode list in.*/
-  _chooser->scheme_bits[0]=24;
-  memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
-  for(i=0;i<8;i++){
-    /*Scheme 7 should always start first, and scheme 0 should always start
-       last.*/
-    _chooser->scheme_list[i]=7-i;
-    _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
-  }
-}
-
-/*This is the real purpose of this data structure: not actually selecting a
-   mode scheme, but estimating the cost of coding a given mode given all the
-   modes selected so far.
-  This is done via opportunity cost: the cost is defined as the number of bits
-   required to encode all the modes selected so far including the current one
-   using the best possible scheme, minus the number of bits required to encode
-   all the modes selected so far not including the current one using the best
-   possible scheme.
-  The computational expense of doing this probably makes it overkill.
-  Just be happy we take a greedy approach instead of trying to solve the
-   global mode-selection problem (which is NP-hard).
-  _mode: The mode to determine the cost of.
-  Return: The number of bits required to code this mode.*/
-static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
- int _mode){
-  int scheme0;
-  int scheme1;
-  int si;
-  int scheme_bits;
-  int best_bits;
-  int mode_bits;
-  scheme0=_chooser->scheme_list[0];
-  scheme1=_chooser->scheme_list[1];
-  best_bits=_chooser->scheme_bits[scheme0];
-  mode_bits=_chooser->mode_codes[scheme0][
-   _chooser->mode_ranks[scheme0][_mode]].nbits;
-  /*Typical case: If the difference between the best scheme and the next best
-     is greater than 6 bits, then adding just one mode cannot change which
-     scheme we use.*/
-  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
-  /*Otherwise, check to see if adding this mode selects a different scheme
-     as the best.*/
-  si=1;
-  best_bits+=mode_bits;
-  do{
-    scheme1=_chooser->scheme_list[si];
-    /*For any scheme except 0, we can just use the bit cost of the mode's rank
-       in that scheme.*/
-    if(scheme1!=0){
-      scheme_bits=_chooser->scheme_bits[scheme1]+
-       _chooser->mode_codes[scheme1][
-       _chooser->mode_ranks[scheme1][_mode]].nbits;
-    }
-    else{
-      int ri;
-      /*For scheme 0, incrementing the mode count could potentially change the
-         mode's rank.
-        Find the index where the mode would be moved to in the optimal list,
-         and use its bit cost instead of the one for the mode's current
-         position in the list.*/
-      for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
-       _chooser->mode_counts[_mode]>=
-       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
-      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
-    }
-    if(scheme_bits<best_bits)best_bits=scheme_bits;
-    si++;
-  }
-  while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
-   _chooser->scheme_bits[scheme0]<=6);
-  return best_bits-_chooser->scheme_bits[scheme0];
-}
-
-/*Update the mode counts and per-scheme bit counts and re-order the scheme
-   lists once a mode has been selected.
-  _mode: The mode that was chosen.*/
-static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
- int _mode){
-  int ri;
-  int si;
-  _chooser->mode_counts[_mode]++;
-  /*Re-order the scheme0 mode list if necessary.*/
-  for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
-    int pmode;
-    pmode=_chooser->scheme0_list[ri-1];
-    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
-    _chooser->scheme0_ranks[pmode]++;
-    _chooser->scheme0_list[ri]=pmode;
-  }
-  _chooser->scheme0_ranks[_mode]=ri;
-  _chooser->scheme0_list[ri]=_mode;
-  /*Now add the bit cost for the mode to each scheme.*/
-  for(si=0;si<8;si++){
-    _chooser->scheme_bits[si]+=
-     _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
-  }
-  /*Finally, re-order the list of schemes.*/
-  for(si=1;si<8;si++){
-    int sj;
-    int scheme0;
-    int bits0;
-    scheme0=_chooser->scheme_list[si];
-    bits0=_chooser->scheme_bits[scheme0];
-    sj=si;
-    do{
-      int scheme1;
-      scheme1=_chooser->scheme_list[sj-1];
-      if(bits0>=_chooser->scheme_bits[scheme1])break;
-      _chooser->scheme_list[sj]=scheme1;
-    }
-    while(--sj>0);
-    _chooser->scheme_list[sj]=scheme0;
-  }
-}
-
-
-
 /*Initializes the macro block neighbor lists.
   This assumes that the entire mbinfo memory region has been initialized with
    zeros.
@@ -527,144 +360,6 @@
   return 0;
 }
 
-/*Select the set of quantizers to use for the current frame for each possible
-   frame type (intra or inter).
-  This does not assign a quantizer to each fragment, as that depends on the
-   quantizer type used and thus is done during mode decision.*/
-static void oc_enc_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
-  unsigned              qmax[2][3];
-  int                   fti;
-  int                   qti;
-  int                   pli;
-  int                   dc_qi[2];
-  /*The first quantizer value is used for DC coefficients.
-    Select one that allows us to meet our quality requirements.*/
-  for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
-    qmax[qti][pli]=OC_MAXI(2U*_enc->dc_tol_mins[pli],OC_DC_QUANT_MIN[qti]);
-  }
-  /*For intra frames...(containing just INTRA fragments)*/
-  for(dc_qi[0]=0;dc_qi[0]<63;dc_qi[0]++){
-    if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
-     _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
-     _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
-      break;
-    }
-  }
-  /*For inter frames...(containing both INTER and INTRA fragments)*/
-  if(!_intra_only){
-    for(dc_qi[1]=dc_qi[0];dc_qi[1]<63;dc_qi[1]++){
-      if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
-       _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
-       _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
-        break;
-      }
-    }
-  }
-  /*Now we select a full qi list for each frame type.*/
-  for(fti=0;fti<1+!_intra_only;fti++){
-    oc_fragment_enc_info *efrag;
-    int                   ncoded_fragis;
-    int                   nqis[64];
-    int                   qi;
-    int                   qi0;
-    int                   qi1;
-    int                   qi2;
-    /*Here we count up the number of fragments that can use each qi value.
-      Unless we know this is an intra frame, we don't know what quantizer type
-       will be used for each fragment, so we just count both of them.*/
-    memset(nqis,0,sizeof(nqis));
-    if(fti){
-      int *coded_fragi;
-      int *coded_fragi_end;
-      coded_fragi=_enc->state.coded_fragis;
-      ncoded_fragis=_enc->state.ncoded_fragis[0]+
-       _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
-      coded_fragi_end=coded_fragi+ncoded_fragis;
-      for(;coded_fragi<coded_fragi_end;coded_fragi++){
-        efrag=_enc->frinfo+*coded_fragi;
-        for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
-      }
-    }
-    else{
-      oc_fragment_enc_info *efrag_end;
-      ncoded_fragis=_enc->state.nfrags;
-      efrag=_enc->frinfo;
-      efrag_end=efrag+ncoded_fragis;
-      for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
-    }
-    /*We'll now choose the qi values that divide the fragments into equally
-       sized groups, or as close as we can make it.
-      We account for the DC coefficients by adding an extra amount to the qi
-       value they require.
-      Since there are usually many more DC coefficients coded than any one AC
-       coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
-    nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
-    /*Convert this into a moment table.*/
-    for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
-    for(qi0=64;qi0-->0&&nqis[qi0]<=0;);
-    for(qi1=qi0-1;qi1>=0&&nqis[qi1]<=nqis[qi0];qi1--);
-    /*Test to make sure there are even two unique quantizers.*/
-    if(qi1>=0){
-      ogg_int64_t best_metric;
-      ogg_int64_t metric;
-      int         best_qi1;
-      int         best_qi2;
-      int         qii;
-      for(qi2=qi1-1;qi2>=0&&nqis[qi2]<=nqis[qi1];qi2--);
-      /*Test to make sure there are three unique quantizers.*/
-      if(qi2>=0){
-        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
-         (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
-        best_qi1=qi1;
-        best_qi2=qi2;
-        for(;nqis[qi1]<nqis[1];qi1--){
-          for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
-            metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
-             (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
-            if(metric>=best_metric){
-              best_qi1=qi1;
-              best_qi2=qi2;
-              best_metric=metric;
-            }
-          }
-        }
-        _enc->qis[fti][0]=qi0;
-        _enc->qis[fti][1]=best_qi1;
-        _enc->qis[fti][2]=best_qi2;
-        _enc->nqis[fti]=3;
-      }
-      else{
-        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
-        best_qi1=qi1;
-        if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
-          metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
-          if(metric>best_metric){
-            best_qi1=qi1;
-            best_metric=metric;
-          }
-        }
-        _enc->qis[fti][0]=qi0;
-        _enc->qis[fti][1]=best_qi1;
-        _enc->nqis[fti]=2;
-      }
-      /*Right now qis[0] is the largest.
-        We want to use the smallest that is still large enough for our DC
-         coefficients.*/
-      for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
-        qi0=_enc->qis[fti][0];
-        _enc->qis[fti][0]=_enc->qis[fti][qii];
-        _enc->qis[fti][qii]=qi0;
-      }
-    }
-    else{
-      _enc->qis[fti][0]=qi0;
-      _enc->nqis[fti]=1;
-    }
-    /*If we're in VP3 compatibility mode, just use the first quantizer.*/
-    if(_enc->vp3_compatible)_enc->nqis[fti]=1;
-  }
-}
-
 static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){
   /*Mark this packet as a data packet.*/
   oggpackB_write(&_enc->opb,0,1);
@@ -690,78 +385,6 @@
   }
 }
 
-/*Mark all fragments as coded and in OC_MODE_INTRA.
-  This also selects a quantizer value for each fragment and builds up the
-   coded fragment list (in coded order) and clears the uncoded fragment list.
-  It does not update the coded macro block list, as that is not used when
-   coding INTRA frames.*/
-static void oc_enc_mark_all_intra(oc_enc_ctx *_enc){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    qii;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  /*Select the quantizer list for INTRA frames.*/
-  _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
-  for(qii=0;qii<_enc->state.nqis;qii++){
-    _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
-  }
-  prev_ncoded_fragis=ncoded_fragis=0;
-  sb=sb_end=_enc->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    fplane=_enc->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
-          oc_fragment_enc_info *efrag;
-          oc_fragment          *frag;
-          int                   fragi;
-          int                   best_qii;
-          fragi=sb->map[quadi][bi];
-          frag=_enc->state.frags+fragi;
-          frag->coded=1;
-          frag->mbmode=OC_MODE_INTRA;
-          efrag=_enc->frinfo+fragi;
-          best_qii=0;
-          for(qii=1;qii<_enc->state.nqis;qii++){
-            if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
-             (_enc->state.qis[best_qii]<efrag->qi_min[0]||
-             _enc->state.qis[qii]<_enc->state.qis[best_qii])){
-              best_qii=qii;
-            }
-          }
-          efrag->qii=(unsigned char)best_qii;
-          frag->qi=_enc->state.qis[best_qii];
-          _enc->state.coded_fragis[ncoded_fragis++]=fragi;
-#if defined(OC_BITRATE_STATS)
-          /*Compute the error function used for intra mode fragments.
-            This function can only use information known at mode decision time, and
-             so excludes the DC component.
-            TODO: Separate this out somewhere more useful.*/
-          {
-            oc_fragment_enc_info *efrag;
-            int                   ci;
-            int                   eerror;
-            efrag=_enc->frinfo+fragi;
-            eerror=0;
-            for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
-            efrag->eerror=eerror;
-          }
-#endif
-        }
-      }
-    }
-    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _enc->state.nuncoded_fragis[pli]=0;
-  }
-}
-
 static void oc_enc_block_qis_pack(oc_enc_ctx *_enc){
   int *coded_fragi;
   int *coded_fragi_end;
@@ -823,67 +446,6 @@
 /*Performs an fDCT on a given fragment.
   _frag:     The fragment to perform the 2D DCT on.
   _dct_vals: The output buffer for the DCT coefficients.
-  _ystride:  The Y stride of the plane the fragment belongs to.
-  _framei:   The picture buffer index to perform the DCT on.
-             Use OC_FRAME_IO for the current input frame.*/
-void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
- ogg_int16_t _dct_vals[64],int _ystride,int _framei){
-  ogg_int16_t    pix_buf[64];
-  unsigned char *pixels;
-  int            pixi;
-  int            y;
-  int            x;
-  /*NOTE: 128 is subtracted from each pixel value to make it signed.
-    The original VP3 source claimed that, "this reduces the internal precision
-     requirments [sic] in the DCT transform."
-    This is of course not actually true.
-    The transform must still support input in the range [-255,255] to code
-     predicted fragments, since the same transform is used for both.
-    This actually _reduces_ the precision of the results, because larger
-     (absolute) values would have fewer significant bits chopped off when
-     rounding.
-    We're stuck with it, however.
-    At least it might reduce bias towards 0 when coding unpredicted DC
-     coefficients, but that's not what VP3 justified it with.*/
-  pixels=_frag->buffer[_framei];
-  /*For border fragments, only copy pixels that are in the displayable
-     region of the image.
-    The DCT function will compute optimal padding values for the other
-     pixels.*/
-  if(_frag->border!=NULL){
-    ogg_int64_t mask;
-    mask=_frag->border->mask;
-    for(pixi=y=0;y<8;y++){
-      for(x=0;x<8;x++,pixi++){
-        pix_buf[pixi]=(ogg_int16_t)(((int)mask&1)?pixels[x]-128:0);
-        /*This branchless code is (almost) equivalent to the previous line:
-        int pmask;
-        pmask=-(int)mask&1;
-        pix_buf[pixi]=(ogg_int16_t)(pmask&pixels[x]);
-        We don't use this code to allow the user to pass in a buffer that is
-         the exact size of the displayed image, not the size padded to a
-         multiple of 16.
-        In the latter case, we might segfault on pixels[x] if it is not mapped
-         to a valid page.*/
-        mask>>=1;
-      }
-      pixels+=_ystride;
-    }
-    oc_fdct8x8_border(_frag->border,_dct_vals,pix_buf);
-  }
-  /*Otherwise, copy all the pixels in the fragment and do a normal DCT.*/
-  else{
-    for(pixi=y=0;y<8;y++){
-      for(x=0;x<8;x++,pixi++)pix_buf[pixi]=(ogg_int16_t)(pixels[x]-128);
-      pixels+=_ystride;
-    }
-    oc_fdct8x8(_dct_vals,pix_buf);
-  }
-}
-
-/*Performs an fDCT on a given fragment.
-  _frag:     The fragment to perform the 2D DCT on.
-  _dct_vals: The output buffer for the DCT coefficients.
   _pli:      The color plane the fragment belongs to.*/
 static void oc_enc_frag_inter_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
  ogg_int16_t _dct_vals[64],int _pli){
@@ -962,466 +524,13 @@
   }
 }
 
-/*Computes the SAD value of a fragment in the input image with respect to its
-   motion compensated predictor..
-  _frag:     The fragment to find the SAD of.
-  _dx:       The X component of the motion vector.
-  _dy:       The Y component of the motion vector.
-  _pli:      The color plane the fragment belongs to.
-  _frame:    The reference frame to predict from.*/
-static int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
- int _dy,int _pli,int _frame){
-  int cur_ystride;
-  int ref_ystride;
-  int ref_framei;
-  int mvoffset0;
-  int mvoffset1;
-  cur_ystride=_enc->state.input[_pli].ystride;
-  ref_framei=_enc->state.ref_frame_idx[_frame];
-  ref_ystride=_enc->state.ref_frame_bufs[ref_framei][_pli].ystride;
-  if(oc_state_get_mv_offsets(&_enc->state,&mvoffset0,&mvoffset1,_dx,_dy,
-   ref_ystride,_pli)>1){
-    if(_frag->border==NULL){
-      return oc_sad8_halfpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
-       _frag->buffer[ref_framei]+mvoffset0,
-       _frag->buffer[ref_framei]+mvoffset1,ref_ystride);
-    }
-    else{
-      return oc_sad8_halfpel_border(_frag->buffer[OC_FRAME_IO],cur_ystride,
-       _frag->buffer[ref_framei]+mvoffset0,
-       _frag->buffer[ref_framei]+mvoffset1,ref_ystride,_frag->border->mask);
-    }
-  }
-  else{
-    if(_frag->border==NULL){
-      return oc_sad8_fullpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
-       _frag->buffer[ref_framei]+mvoffset0,ref_ystride);
-    }
-    else{
-      return oc_sad8_fullpel_border(_frag->buffer[OC_FRAME_IO],
-       cur_ystride,_frag->buffer[ref_framei]+mvoffset0,ref_ystride,
-       _frag->border->mask);
-    }
-  }
-}
-
-
-
-/*The number of different DCT coefficient values that can be stored by each
-   of the different DCT value category tokens.*/
-static const int OC_DCT_VAL_CAT_SIZES[6]={2,4,8,16,32,512};
-/*The number of bits to shift the sign of the DCT coefficient over by for each
-   of the different DCT value category tokens.*/
-static const int OC_DCT_VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
-
-
-
-/*Quantize and predict the DC coefficients.
-  This is done in a separate step because the prediction of DC coefficients
-   occurs in image order, not in the Hilbert-curve order, unlike the rest of
-   the encoding process.*/
-static void oc_enc_quant_dc(oc_enc_ctx *_enc){
-  oc_fragment_enc_info *efrag;
-  oc_fragment          *frag;
-  int                   pli;
-  frag=_enc->state.frags;
-  efrag=_enc->frinfo;
-  for(pli=0;pli<3;pli++){
-    oc_fragment_plane *fplane;
-    unsigned           fquant;
-    unsigned           iquant;
-    int                pred_last[3];
-    int                fragx;
-    int                fragy;
-    pred_last[OC_FRAME_GOLD]=0;
-    pred_last[OC_FRAME_PREV]=0;
-    pred_last[OC_FRAME_SELF]=0;
-    fplane=_enc->state.fplanes+pli;
-    for(fragy=0;fragy<fplane->nvfrags;fragy++){
-      for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
-        int qc_pred;
-        int qc;
-        if(!frag->coded)continue;
-        qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
-        /*Fragments outside the displayable region must still be coded in key
-           frames.
-          To minimize wasted bits, just use the predicted DC value.
-          TODO: We might do a better job in the lower-left hand corner by
-           propagating over the DC value of the first actually coded fragment,
-           but for the moment this is not done.*/
-        if(frag->invalid)qc=0;
-        else{
-          int c;
-          int c_abs;
-          int qti;
-          /*We now center the DC coefficient range around the predicted value
-             and perform token bits optimization based on the HVS-determined
-             tolerance range.
-            For more details, see oc_enc_frag_quant_tokenize().*/
-          qti=frag->mbmode!=OC_MODE_INTRA;
-          iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
-          c=efrag->dct_coeffs[0]-qc_pred*iquant;
-          c_abs=abs(c);
-          if(c_abs<=efrag->tols[0])qc=0;
-          else{
-            int qc_signed[2];
-            int qc_max;
-            int qc_min;
-            int qc_offs;
-            int c_sign;
-            int c_min;
-            int c_recon;
-            int cati;
-            fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
-            qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
-            c_sign=c<0;
-            c_recon=(qc_max-1)*iquant;
-            c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
-            for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
-            if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
-            else{
-              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
-               cati++){
-                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-              }
-              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
-            }
-            qc_signed[0]=qc;
-            qc_signed[1]=-qc;
-            qc=qc_signed[c_sign];
-          }
-        }
-        pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
-        efrag->dct_coeffs[0]=(ogg_int16_t)qc;
-      }
-    }
-  }
-}
-
-/*Quantize and tokenize the given fragment.
-  _efrag:  The encoder information for the fragment to quantize.
-  _fquant: The forward quantization matrix to use.
-  _iquant: The inverse quantization matrix to use.*/
-static int oc_enc_frag_quant_tokenize(oc_enc_ctx *_enc,
- oc_fragment_enc_info *_efrag,const ogg_uint16_t _fquant[64],
- const ogg_uint16_t _iquant[64]){
-  int zzi;
-  int zrun;
-  int qc;
-  int qc_offs;
-  int c_sign;
-  int cati;
-  int tli;
-  /*The DC coefficient is already quantized (it had to be for DC prediction).
-    Here we just tokenize it.*/
-  if(_efrag->dct_coeffs[0]){
-    qc=abs(_efrag->dct_coeffs[0]);
-    c_sign=_efrag->dct_coeffs[0]<0;
-    switch(qc){
-      case 1:{
-        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-         (unsigned char)(OC_ONE_TOKEN+c_sign);
-      }break;
-      case 2:{
-        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-         (unsigned char)(OC_TWO_TOKEN+c_sign);
-      }break;
-      default:{
-        if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
-          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-           (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
-          _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
-        }
-        else{
-          qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-          for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
-            qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-          }
-          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-           (unsigned char)(OC_DCT_VAL_CAT3+cati);
-          _enc->extra_bits[0][_enc->nextra_bits[0]++]=
-           (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
-        }
-      }
-    }
-    zrun=0;
-  }
-  else zrun=1;
-  /*Now we quantize and tokenize each AC coefficient.*/
-  for(zzi=1;zzi<64;zzi++){
-    int qc_signed[2];
-    int qc_max;
-    int qc_min;
-    int c_sign;
-    int c_abs;
-    int c_min;
-    int c_recon;
-    int ci;
-    ci=OC_FZIG_ZAG[zzi];
-    c_abs=abs(_efrag->dct_coeffs[ci]);
-    /*Best case: we can encode this as a zero.*/
-    if(c_abs<=_efrag->tols[ci]){
-      zrun++;
-      _efrag->dct_coeffs[ci]=0;
-    }
-    else{
-      c_sign=_efrag->dct_coeffs[ci]<0;
-      /*qc_max is the most accurate quantized value.
-        This is the largest possible (absolute) value we will use.*/
-      qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
-      /*qc_min is the smallest possible (by absolute value) quantized value
-         whose dequantized value is within the HVS-determined tolerance
-         range.*/
-      /*TODO: qc_min could be computed by a division (we do not want to allow
-         the rounding errors that are possible with the mul+shift quantization
-         used for qc_max), which would allow qc_max to be calculated only if
-         needed below.
-        Is this faster?
-        Who knows.*/
-      c_recon=(qc_max-1)*_iquant[ci];
-      c_min=c_abs-_efrag->tols[ci];
-      for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
-      /*We now proceed to find a token that is as close to qc_max as possible,
-         but does not use any more bits than would be required for qc_min.
-        The general assumption we make is that encoding a value closer to 0
-         always uses fewer bits.
-        qc_min can still reach 0 here despite the test above, if the quantizer
-         value is larger than the tolerance (which can happen for very small
-         tolerances; the quantizer value has a minimum it cannot go below).*/
-      if(qc_min==0){
-        zrun++;
-        _efrag->dct_coeffs[ci]=0;
-      }
-      else{
-        /*If we have an outstanding zero run, code it now.*/
-        if(zrun>0){
-          /*The zero run tokens appear on the list for the first zero in the
-             run.*/
-          tli=zzi-zrun;
-          /*Second assumption: coding a combined run/value token always uses
-             fewer bits than coding them separately.*/
-          /*CAT1 run/value tokens: the value is 1.*/
-          if(qc_min==1&&zrun<=17){
-            if(zrun<=5){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)c_sign;
-            }
-            else if(zrun<=9){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT1B;
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<2)+zrun-6);
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT1C;
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<3)+zrun-10);
-            }
-            qc_signed[0]=1;
-            qc_signed[1]=-1;
-            _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
-            zrun=0;
-            /*Skip coding the DCT value below.*/
-            continue;
-          }
-          /*CAT2 run/value tokens: the value is 2-3.*/
-          else if(qc_min<=3&&zrun<=3){
-            if(zrun==1){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT2A;
-              qc=OC_MINI(3,qc_max);
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<1)+qc-2);
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT2B;
-              qc=OC_MINI(3,qc_max);
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
-            }
-            qc_signed[0]=qc;
-            qc_signed[1]=-qc;
-            _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
-            zrun=0;
-            /*Skip coding the DCT value below.*/
-            continue;
-          }
-          /*The run is too long or the quantized value too large: code them
-             separately.*/
-          else{
-            /*This is stupid: non-short ZRL tokens are never used for run
-               values less than 9, but codewords are reserved for them,
-               wasting bits.
-              Yes, yes, this would've meant a non-constant number of extra
-               bits for this token, but even so.*/
-            if(zrun<=8){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_SHORT_ZRL_TOKEN;
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_ZRL_TOKEN;
-            }
-            _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-             (ogg_uint16_t)(zrun-1);
-            zrun=0;
-          }
-        }
-        /*No zero run, or the run and the qc value are being coded
-           separately.*/
-        switch(qc_min){
-          case 1:{
-            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-             (unsigned char)(OC_ONE_TOKEN+c_sign);
-            _efrag->dct_coeffs[ci]=(ogg_int16_t)((-c_sign<<1)+1);
-          }break;
-          case 2:{
-            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-             (unsigned char)(OC_TWO_TOKEN+c_sign);
-            _efrag->dct_coeffs[ci]=(ogg_int16_t)((-c_sign<<2)+2);
-          }break;
-          default:{
-            if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
-              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-               (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
-              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
-               (ogg_uint16_t)c_sign;
-              qc_signed[0]=qc_min;
-              qc_signed[1]=-qc_min;
-              _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
-            }
-            else{
-              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
-               cati++){
-                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-              }
-              /*qc_min can be encoded in this category.
-                Since all DCT values in the category use the same number of
-                 bits, we encode the closest value to qc_max.
-                This is either qc_max itself, if it is in the category's
-                 range, or the largest value in the category.*/
-              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
-              qc_signed[0]=qc;
-              qc_signed[1]=-qc;
-              _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
-              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-               (unsigned char)(OC_DCT_VAL_CAT3+cati);
-              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
-               ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
-            }
-          }
-        }
-      }
-    }
-  }
-  /*If there's a trailing zero run, code an EOB token.*/
-  if(zrun>0){
-    int old_tok;
-    int toki;
-    int ebi;
-    tli=64-zrun;
-    toki=_enc->ndct_tokens[tli]-1;
-    if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
-    else old_tok=-1;
-    /*Try to extend an EOB run.*/
-    switch(old_tok){
-      case OC_DCT_EOB1_TOKEN:
-      case OC_DCT_EOB2_TOKEN:{
-        _enc->dct_tokens[tli][toki]++;
-      }break;
-      case OC_DCT_EOB3_TOKEN:{
-        _enc->dct_tokens[tli][toki]++;
-        _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
-      }break;
-      case OC_DCT_REPEAT_RUN0_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          _enc->extra_bits[tli][ebi]=0;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN1_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          _enc->extra_bits[tli][ebi]=0;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN2_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          /*Again stupid: we could encode runs up to 4127, but inexplicably
-             they don't subtract the bottom of the range here, so we can only
-             go to 4095 (unless we want to change the spec to deal with
-             wrap-around).*/
-          _enc->extra_bits[tli][ebi]=32;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN3_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<4095){
-          _enc->extra_bits[tli][ebi]++;
-          break;
-        }
-        /*else fall through.*/
-      }
-      /*Start a new EOB run.*/
-      default:{
-        _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
-      }
-    }
-  }
-  /*Return the number of coefficients before the final zero run.*/
-  return 64-zrun;
-}
-
-static void oc_enc_residual_tokenize(oc_enc_ctx *_enc){
-  int *coded_fragi;
-  int *coded_fragi_end;
-  int    pli;
+/*Merge the final EOB run of each coefficient list with the start of the next,
+   if possible.
+  This assumes that dct_token_offs[0][zzi] is 0 for each zzi, and will
+   increase it as appropriate if an EOB run is merged with that of a previous
+   token index.*/
+void oc_enc_merge_eob_runs(oc_enc_ctx *_enc){
   int    zzi;
-  /*Clear any existing DCT tokens.*/
-  for(zzi=0;zzi<64;zzi++){
-    _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
-    _enc->extra_bits_offs[zzi]=0;
-  }
-  coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
-  for(pli=0;pli<3;pli++){
-    memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
-     sizeof(_enc->dct_token_offs[pli]));
-    coded_fragi_end+=_enc->state.ncoded_fragis[pli];
-    for(;coded_fragi<coded_fragi_end;coded_fragi++){
-      oc_quant_table       *iquants;
-      oc_fragment          *frag;
-      oc_fragment_enc_info *efrag;
-      int                   fragi;
-      int                   qti;
-      int                   nnzc;
-      fragi=*coded_fragi;
-      frag=_enc->state.frags+fragi;
-      efrag=_enc->frinfo+fragi;
-      qti=frag->mbmode!=OC_MODE_INTRA;
-      iquants=_enc->state.dequant_tables[qti][pli];
-      nnzc=oc_enc_frag_quant_tokenize(_enc,efrag,
-       _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
-      /*While we're here and things are in cache, reconstruct the quantized
-         fragment.*/
-      oc_state_frag_recon(&_enc->state,frag,pli,efrag->dct_coeffs,nnzc,nnzc,
-       iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
-    }
-  }
-  /*Merge the final EOB run of one coefficient list with the start of the
-     next, if possible.*/
   for(zzi=1;zzi<64;zzi++){
     static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
     static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
@@ -1617,33 +726,455 @@
 #endif
 }
 
-/*Encodes the current frame as a key frame.
-  The result is stored in the opb field, and the packet state is updated to
-   indicate a new packet is ready.
-  Return: 0 on success, or a negative value on error.*/
-static int oc_enc_keyframe(oc_enc_ctx *_enc){
-  _enc->state.frame_type=OC_INTRA_FRAME;
-  oc_enc_quant_sel_quality(_enc,1);
-  oc_enc_mark_all_intra(_enc);
-  oc_enc_quant_dc(_enc);
-  oc_enc_residual_tokenize(_enc);
-  oggpackB_reset(&_enc->opb);
-  oc_enc_frame_header_pack(_enc);
-  oc_enc_block_qis_pack(_enc);
-  oc_enc_residual_tokens_pack(_enc);
+
+static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
+  const theora_huff_code *codes;
+  const int              *mode_ranks;
+  int                    *coded_mbi;
+  int                    *coded_mbi_end;
+  int                     scheme;
+  scheme=_enc->mode_scheme_chooser.scheme_list[0];
+  oggpackB_write(&_enc->opb,scheme,3);
+  if(scheme==0){
+    int ranks[8];
+    int mi;
+    /*The numbers associated with each mode in the stream are slightly
+       different than what we use in the source.
+      The lookup here converts between the two.*/
+    for(mi=0;mi<OC_NMODES;mi++){
+      ranks[OC_MODE_SCHEMES[6][mi]]=
+       _enc->mode_scheme_chooser.scheme0_ranks[mi];
+    }
+    for(mi=0;mi<OC_NMODES;mi++)oggpackB_write(&_enc->opb,ranks[mi],3);
+  }
+  codes=_enc->mode_scheme_chooser.mode_codes[scheme];
+  mode_ranks=_enc->mode_scheme_chooser.mode_ranks[scheme];
+  coded_mbi=_enc->state.coded_mbis;
+  coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
+  for(;coded_mbi<coded_mbi_end;coded_mbi++){
+    const theora_huff_code *code;
+    oc_mb                  *mb;
+    mb=_enc->state.mbs+*coded_mbi;
+    code=codes+mode_ranks[mb->mode];
+    oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+  }
+}
+
+static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _dx,int _dy){
+  const theora_huff_code *code;
+  code=OC_MV_CODES[_enc->mv_scheme]+_dx+31;
+  oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+  code=OC_MV_CODES[_enc->mv_scheme]+_dy+31;
+  oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+}
+
+static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
+  int *coded_mbi;
+  int *coded_mbi_end;
+  oggpackB_write(&_enc->opb,_enc->mv_scheme,1);
+  coded_mbi=_enc->state.coded_mbis;
+  coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
+  for(;coded_mbi<coded_mbi_end;coded_mbi++){
+    oc_mb          *mb;
+    oc_mb_enc_info *mbinfo;
+    int             mbi;
+    mbi=*coded_mbi;
+    mb=_enc->state.mbs+mbi;
+    switch(mb->mode){
+      case OC_MODE_INTER_MV:
+      case OC_MODE_GOLDEN_MV:{
+        int which_frame;
+        which_frame=OC_FRAME_FOR_MODE[mb->mode];
+        mbinfo=_enc->mbinfo+mbi;
+        oc_enc_mv_pack(_enc,mbinfo->mvs[0][which_frame][0],
+         mbinfo->mvs[0][which_frame][1]);
+      }break;
+      case OC_MODE_INTER_MV_FOUR:{
+        int bi;
+        mbinfo=_enc->mbinfo+mbi;
+        for(bi=0;bi<4;bi++){
+          int fragi;
+          fragi=mb->map[0][bi];
+          if(fragi>=0&&_enc->state.frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mbinfo->bmvs[bi][0],mbinfo->bmvs[bi][1]);
+          }
+        }
+      }break;
+    }
+  }
+}
+
+static void oc_enc_enable_default_mode(oc_enc_ctx *_enc){
+  /*TODO: Right now we always use VBR mode.
+    When a CBR mode is available, we should use that by default if the user
+     specifies a bitrate, but not a quality, in the theora_info struct.*/
+  if(_enc->vbr==NULL)_enc->vbr=oc_enc_vbr_alloc(_enc);
+  oc_enc_vbr_enable(_enc->vbr,NULL);
+}
+
+/*A pipeline stage for copying uncoded fragments.*/
+
+static int oc_copy_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    _stage->y_procd[pli]=0;
+    _stage->enc->uncoded_fragii[pli]=0;
+  }
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_copy_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int        *uncoded_fragis;
+  oc_enc_ctx *enc;
+  int         pli;
+  enc=_stage->enc;
+  uncoded_fragis=enc->state.uncoded_fragis;
+  for(pli=0;pli<3;pli++){
+    int y_avail;
+    y_avail=_y_avail[pli];
+    /*Process in units of super block rows, with the possible exception of the
+       last, partial super block row.*/
+    if(y_avail<enc->state.input[pli].height)y_avail&=~31;
+    if(y_avail>_stage->y_procd[pli]){
+      if(enc->uncoded_fragii[pli]<enc->state.nuncoded_fragis[pli]){
+        oc_fragment_plane *fplane;
+        int                fragi_end;
+        int                fragii;
+        fplane=enc->state.fplanes+pli;
+        fragi_end=(y_avail>>3)*fplane->nhfrags+fplane->froffset;
+        /*Count the uncoded fragments that belong in these super block rows.*/
+        for(fragii=enc->uncoded_fragii[pli];
+         fragii<enc->state.nuncoded_fragis[pli]&&
+         *(uncoded_fragis-fragii)<fragi_end;fragii++);
+        /*And copy them.*/
+        oc_state_frag_copy(&enc->state,uncoded_fragis-fragii,
+         fragii-enc->uncoded_fragii[pli],OC_FRAME_SELF,OC_FRAME_PREV,pli);
+        enc->uncoded_fragii[pli]=fragii;
+      }
+      _stage->y_procd[pli]=y_avail;
+      if(_stage->next!=NULL){
+        int ret;
+        ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+        if(ret<0)return ret;
+      }
+    }
+    uncoded_fragis-=enc->state.nuncoded_fragis[pli];
+  }
+  return 0;
+}
+
+static int oc_copy_pipe_end(oc_enc_pipe_stage *_stage){
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the uncoded fragment copying stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_copy_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_copy_pipe_start;
+  _stage->pipe_proc=oc_copy_pipe_process;
+  _stage->pipe_end=oc_copy_pipe_end;
+}
+
+/*A pipeline stage for applying the loop filter.*/
+
+static int oc_loop_pipe_start(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx *enc;
+  int         pli;
+  enc=_stage->enc;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  enc->loop_filter_enabled=enc->ncoded_frags>0&&
+   !oc_state_loop_filter_init(&enc->state,enc->bounding_values+256);
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_loop_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  oc_enc_ctx *enc;
+  int         pli;
+  enc=_stage->enc;
+  if(enc->loop_filter_enabled){
+    int refi;
+    refi=enc->state.ref_frame_idx[OC_FRAME_SELF];
+    for(pli=0;pli<3;pli++){
+      int delay;
+      int fragy0;
+      int fragy_end;
+      fragy0=_stage->y_procd[pli]+1>>3;
+      /*Add a 2 pixel delay for the vertical filter, except in the last row.*/
+      delay=(_y_avail[pli]<enc->state.ref_frame_bufs[refi][pli].height);
+      fragy_end=_y_avail[pli]-(delay<<1)>>3;
+      if(fragy_end>fragy0){
+        oc_state_loop_filter_frag_rows(&enc->state,enc->bounding_values+256,
+         refi,pli,fragy0,fragy_end);
+        /*We also add a 1 pixel delay to the next stage, since the vertical
+           filter for the next fragment row can still change the last row of
+           pixels from this fragment row.*/
+        _stage->y_procd[pli]=(fragy_end<<3)-delay;
+        if(_stage->next!=NULL){
+          int ret;
+          ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+          if(ret<0)return ret;
+        }
+      }
+    }
+  }
+  else{
+    for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+    if(_stage->next!=NULL){
+      return (*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    }
+  }
+  return 0;
+}
+
+static int oc_loop_pipe_end(oc_enc_pipe_stage *_stage){
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the loop filter stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_loop_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_loop_pipe_start;
+  _stage->pipe_proc=oc_loop_pipe_process;
+  _stage->pipe_end=oc_loop_pipe_end;
+}
+
+/*A pipeline stage for filling in the image border.*/
+
+static int oc_fill_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_fill_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  if(_stage->enc->ncoded_frags>0){
+    oc_theora_state *state;
+    int              refi;
+    state=&_stage->enc->state;
+    refi=state->ref_frame_idx[OC_FRAME_SELF];
+    for(pli=0;pli<3;pli++){
+      if(_stage->y_procd[pli]<_y_avail[pli]){
+        oc_state_borders_fill_rows(state,refi,pli,_stage->y_procd[pli],
+         _y_avail[pli]);
+        _stage->y_procd[pli]=_y_avail[pli];
+        if(_stage->next!=NULL){
+          int ret;
+          ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+          if(ret<0)return ret;
+        }
+      }
+    }
+  }
+  else{
+    for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+    if(_stage->next!=NULL){
+      return (*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    }
+  }
+  return 0;
+}
+
+static int oc_fill_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_theora_state *state;
+  int              refi;
+  int              pli;
+  state=&_stage->enc->state;
+  refi=state->ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(state,refi,pli);
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the loop filter stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_fill_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_fill_pipe_start;
+  _stage->pipe_proc=oc_fill_pipe_process;
+  _stage->pipe_end=oc_fill_pipe_end;
+}
+
+/*A pipeline stage for storing the encoded frame contents in a packet.*/
+
+static int oc_pack_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return 0;
+}
+
+static int oc_pack_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+  return 0;
+}
+
+static int oc_pack_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx *enc;
+  int         ret;
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_start)(_stage->next);
+    if(ret<0)return ret;
+  }
+  enc=_stage->enc;
+  oggpackB_reset(&enc->opb);
+  /*Only proceed if we have some coded blocks.
+    No coded blocks -> dropped frame -> 0 byte packet.*/
+  if(enc->ncoded_frags>0){
+    oc_enc_frame_header_pack(enc);
+    if(enc->state.frame_type==OC_INTER_FRAME){
+      oggpackB_writecopy(&enc->opb,
+       oggpackB_get_buffer(&enc->opb_coded_flags),
+       oggpackB_bits(&enc->opb_coded_flags));
+      oc_enc_mb_modes_pack(enc);
+      oc_enc_mvs_pack(enc);
+    }
+    oc_enc_block_qis_pack(enc);
+    /*Pack the quantized DCT coefficients.*/
+    oc_enc_residual_tokens_pack(enc);
+  }
   /*Success: Mark the packet as ready to be flushed.*/
-  _enc->packet_state=OC_PACKET_READY;
+  enc->packet_state=OC_PACKET_READY;
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    if(ret<0)return ret;
+    return (*_stage->next->pipe_end)(_stage->next);
+  }
   return 0;
 }
 
+/*Initialize the loop filter stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_pack_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_pack_pipe_start;
+  _stage->pipe_proc=oc_pack_pipe_process;
+  _stage->pipe_end=oc_pack_pipe_end;
+}
 
 
+static int oc_enc_init(oc_enc_ctx *_enc,const theora_info *_info){
+  int ret;
+  /*Initialize the shared encoder/decoder state.*/
+  ret=oc_state_init(&_enc->state,_info);
+  if(ret<0)return ret;
+  _enc->block_coded_flags=_ogg_calloc(_enc->state.nfrags,
+   sizeof(_enc->block_coded_flags[0]));
+  /*Initialize our packet buffers.*/
+  oggpackB_writeinit(&_enc->opb);
+  oggpackB_writeinit(&_enc->opb_coded_flags);
+  /*Allocate and initialize storage for encoder-specific fragment and macro
+     block storage, as well as DCT token storage.*/
+  _enc->frinfo=_ogg_calloc(_enc->state.nfrags,
+   sizeof(_enc->frinfo[0]));
+  _enc->mbinfo=_ogg_calloc(_enc->state.nmbs,sizeof(_enc->mbinfo[0]));
+  _enc->dct_tokens=(unsigned char **)oc_malloc_2d(64,
+   _enc->state.nfrags,sizeof(_enc->dct_tokens[0][0]));
+  _enc->extra_bits=(ogg_uint16_t **)oc_malloc_2d(64,
+   _enc->state.nfrags,sizeof(_enc->extra_bits[0][0]));
+  oc_enc_init_mbinfo(_enc);
+  /*Do one-time mode scheme chooser initialization.*/
+  oc_mode_scheme_chooser_init(&_enc->mode_scheme_chooser);
+  /*Set the maximum distance between key frames.*/
+  _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
+  /*Initialize the motion compensation, high-level importance map, and
+     low-level psychovisual model plug-ins.*/
+  _enc->mcenc=oc_mcenc_alloc(_enc);
+  /*Reset the packet-out state machine.*/
+  _enc->packet_state=OC_PACKET_INFO_HDR;
+  /*Mark us as not VP3-compatible.*/
+  _enc->vp3_compatible=0;
+  /*Set the Huffman codes and quantization parameters to the defaults.*/
+  memcpy(_enc->huff_codes,OC_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
+  oc_enc_set_quant_params(_enc,NULL);
+  /*Initialize the static pipeline stages.*/
+  oc_fdct_pipe_init(&_enc->fdct_pipe,_enc);
+  oc_copy_pipe_init(&_enc->copy_pipe,_enc);
+  oc_loop_pipe_init(&_enc->loop_pipe,_enc);
+  _enc->copy_pipe.next=&_enc->loop_pipe;
+  oc_fill_pipe_init(&_enc->fill_pipe,_enc);
+  _enc->loop_pipe.next=&_enc->fill_pipe;
+  oc_pack_pipe_init(&_enc->pack_pipe,_enc);
+  /*Delay initialization of the encoding pipeline until the application sets
+     an encoding mode or the first frame is submitted.*/
+  _enc->pipe=NULL;
+  _enc->vbr=NULL;
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc){
+  oc_enc_vbr_free(_enc->vbr);
+  oc_mcenc_free(_enc->mcenc);
+  oc_free_2d(_enc->extra_bits);
+  oc_free_2d(_enc->dct_tokens);
+  _ogg_free(_enc->mbinfo);
+  _ogg_free(_enc->frinfo);
+  _ogg_free(_enc->block_coded_flags);
+  oc_state_clear(&_enc->state);
+}
+
+
+
+/*A default implementation of set_speed, to use when the encoding mode is not
+   configurable.
+  It does nothing.
+  _speed: The encoding speed to use.*/
+void oc_enc_set_speed_null(oc_enc_ctx *_enc,int _speed){}
+
+/*Computes the SAD value of a fragment in the input image with respect to its
+   motion compensated predictor..
+  _frag:     The fragment to find the SAD of.
+  _dx:       The X component of the motion vector.
+  _dy:       The Y component of the motion vector.
+  _pli:      The color plane the fragment belongs to.
+  _frame:    The reference frame to predict from.*/
+int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
+ int _dy,int _pli,int _frame){
+  int cur_ystride;
+  int ref_ystride;
+  int ref_framei;
+  int mvoffset0;
+  int mvoffset1;
+  cur_ystride=_enc->state.input[_pli].ystride;
+  ref_framei=_enc->state.ref_frame_idx[_frame];
+  ref_ystride=_enc->state.ref_frame_bufs[ref_framei][_pli].ystride;
+  if(oc_state_get_mv_offsets(&_enc->state,&mvoffset0,&mvoffset1,_dx,_dy,
+   ref_ystride,_pli)>1){
+    if(_frag->border==NULL){
+      return oc_sad8_halfpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
+       _frag->buffer[ref_framei]+mvoffset0,
+       _frag->buffer[ref_framei]+mvoffset1,ref_ystride);
+    }
+    else{
+      return oc_sad8_halfpel_border(_frag->buffer[OC_FRAME_IO],cur_ystride,
+       _frag->buffer[ref_framei]+mvoffset0,
+       _frag->buffer[ref_framei]+mvoffset1,ref_ystride,_frag->border->mask);
+    }
+  }
+  else{
+    if(_frag->border==NULL){
+      return oc_sad8_fullpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
+       _frag->buffer[ref_framei]+mvoffset0,ref_ystride);
+    }
+    else{
+      return oc_sad8_fullpel_border(_frag->buffer[OC_FRAME_IO],
+       cur_ystride,_frag->buffer[ref_framei]+mvoffset0,ref_ystride,
+       _frag->border->mask);
+    }
+  }
+}
+
 /*Writes the bit flags for whether or not each super block is partially coded
    or not.
   These flags are run-length encoded, with the flag value alternating between
    each run.
   Return: The number of bits written.*/
-static int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
   oc_sb    *sb;
   oc_sb    *sb_end;
   unsigned  flag;
@@ -1651,7 +1182,7 @@
   int       ret;
   /*Write the list of partially coded super block flags.*/
   flag=_enc->state.sbs[0].coded_partially;
-  oggpackB_write(&_enc->opb_coded_flags,flag,1);
+  oggpackB_write(_opb,flag,1);
   ret=1;
   sb=_enc->state.sbs;
   sb_end=sb+_enc->state.nsbs;
@@ -1668,18 +1199,18 @@
        invalid code for longer runs.*/
     /*First, encode runs until we have 4129 or fewer sbs left.*/
     while(run_count>4129){
-      ret+=oc_sb_run_pack(&_enc->opb_coded_flags,4129);
+      ret+=oc_sb_run_pack(_opb,4129);
       run_count-=4129;
-      oggpackB_write(&_enc->opb_coded_flags,flag,1);
+      oggpackB_write(_opb,flag,1);
       ret++;
     }
     /*Encode the last run.*/
-    ret+=oc_sb_run_pack(&_enc->opb_coded_flags,run_count);
+    ret+=oc_sb_run_pack(_opb,run_count);
     flag=!flag;
     /*If there are more sbs to come, and we had a run of 4129 exactly,
        encode the flipped bit.*/
     if(run_count==4129&&sb<sb_end){
-      oggpackB_write(&_enc->opb_coded_flags,flag,1);
+      oggpackB_write(_opb,flag,1);
       ret++;
     }
   }
@@ -1691,7 +1222,7 @@
   These flags are run-length encoded, with the flag value altenating between
    each run.
   Return: The number of bits written.*/
-static int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
   oc_sb    *sb;
   oc_sb    *sb_end;
   unsigned  flag;
@@ -1706,7 +1237,7 @@
     if(!sb->coded_partially)break;
   }
   flag=sb->coded_fully;
-  oggpackB_write(&_enc->opb_coded_flags,flag,1);
+  oggpackB_write(_opb,flag,1);
   ret=1;
   while(sb<sb_end){
     for(run_count=0;sb<sb_end;sb++){
@@ -1722,16 +1253,16 @@
        invalid code for longer runs.*/
     /*First, encode runs until we have 4129 or fewer sbs left.*/
     while(run_count>4129){
-      ret+=oc_sb_run_pack(&_enc->opb_coded_flags,4129);
+      ret+=oc_sb_run_pack(_opb,4129);
       run_count-=4129;
-      oggpackB_write(&_enc->opb_coded_flags,flag,1);
+      oggpackB_write(_opb,flag,1);
       ret++;
     }
     /*Encode the last run.*/
-    ret+=oc_sb_run_pack(&_enc->opb_coded_flags,run_count);
+    ret+=oc_sb_run_pack(_opb,run_count);
     flag=!flag;
     if(run_count==4129&&sb<sb_end){
-      oggpackB_write(&_enc->opb_coded_flags,flag,1);
+      oggpackB_write(_opb,flag,1);
       ret++;
     }
   }
@@ -1743,14 +1274,14 @@
   These flags are run-length encoded, with the flag value alternating between
    each run.
   Return: The number of bits written.*/
-static int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
   int flag;
   int run_count;
   int bli;
   int ret;
   if(_enc->nblock_coded_flags<=0)return 0;
   flag=_enc->block_coded_flags[0];
-  oggpackB_write(&_enc->opb_coded_flags,flag,1);
+  oggpackB_write(_opb,flag,1);
   ret=1;
   for(bli=0;bli<_enc->nblock_coded_flags;){
     for(run_count=0;bli<_enc->nblock_coded_flags;bli++){
@@ -1763,580 +1294,15 @@
        or its complement).
       This avoids the nastiness of the VLC not letting us encode runs long
        enough like above.*/
-    ret+=oc_block_run_pack(&_enc->opb_coded_flags,run_count);
+    ret+=oc_block_run_pack(_opb,run_count);
     flag=!flag;
   }
   return ret;
 }
 
-/*Marks each fragment as coded or not, based on the coefficient-level
-   thresholds computed in the psychovisual stage.
-  The MB mode of the fragments are not set, as they will be computed in
-   oc_enc_choose_mbmodes().
-  This also builds up the coded fragment and uncoded fragment lists.
-  The coded MB list is not built up.
-  That is done during mode decision.*/
-static void oc_enc_mark_coded(oc_enc_ctx *_enc){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    bli;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  int    nuncoded_fragis;
-  int    prev_nuncoded_fragis;
-  _enc->nblock_coded_flags=bli=0;
-  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
-  sb=sb_end=_enc->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    int                      ystride;
-    int                      prev_refi;
-    fplane=_enc->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
-    ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      sb->coded_fully=1;
-      sb->coded_partially=0;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
-          if(fragi>=0){
-            oc_fragment *frag;
-            int          flag;
-            frag=_enc->state.frags+fragi;
-            if(frag->invalid){
-              frag->coded=0;
-              *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
-            }
-            else{
-              oc_fragment_enc_info *efrag;
-              ogg_int16_t           dct_buf[64];
-              int                   ci;
-              /*Check to see if the fragment can be skipped.
-                It is assumed that a skipped fragment always takes fewer bits
-                 than a coded fragment, though this may not necessarily be true.
-                A single skipped fragment could take up to 34 bits to encode
-                 its location in the RLE scheme Theora uses */
-              oc_enc_frag_intra_fdct(_enc,frag,dct_buf,ystride,prev_refi);
-              efrag=_enc->frinfo+fragi;
-              /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
-                 ensures we mark a fragment as skipped if it would be quantized
-                 to all zeros in OC_MODE_INTER_NOMV.
-                These minimum quantizers represent the maximum quality the
-                 format is capable of, and can be larger than our tolerances.
-                The minimum for INTER modes is twice the minimum for INTRA
-                 modes, so technically if the tolerances are below this
-                 threshold, we might be able to do a better job representing
-                 this fragment by coding it in INTRA mode.
-                But the number of extra bits required to do that would be
-                 ridiculous, so we give up our devotion to minimum quality just
-                 this once.
-
-                Note: OC_DC_QUANT_MIN[0] should actually be
-                 OC_DC_QUANT_MIN[1]>>1, but in this case those are
-                 equivalent.*/
-              ci=0;
-              if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
-               OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
-                for(ci++;ci<64;ci++){
-                  if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
-                   OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
-                    break;
-                  }
-                }
-              }
-              if(ci>=64){
-                frag->coded=0;
-                *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
-              }
-              else{
-                frag->coded=1;
-                _enc->state.coded_fragis[ncoded_fragis++]=fragi;
-              }
-            }
-            flag=frag->coded;
-            sb->coded_fully&=flag;
-            sb->coded_partially|=flag;
-            _enc->block_coded_flags[bli++]=(char)flag;
-          }
-        }
-      }
-      /*If this is a partially coded super block, keep the entries just added
-         to the code block flag list.*/
-      if(!sb->coded_fully&&sb->coded_partially){
-        _enc->nblock_coded_flags=bli;
-      }
-      /*Otherwise, discard these entries from the list, as they are
-         implicit.*/
-      else{
-        sb->coded_partially=0;
-        bli=_enc->nblock_coded_flags;
-      }
-    }
-    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
-    prev_nuncoded_fragis=nuncoded_fragis;
-  }
-}
-
-/*Selects an appropriate coding mode for each macro block.
-  A mode is chosen for the macro blocks with at least one coded fragment.
-  A bit cost estimate for coding the frame with the selected modes is made,
-   and a similar estimate is made for coding the frame as a key frame.
-  These estimates are used to select the optimal frame type.
-  Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
-static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
-  oc_set_chroma_mvs_func  set_chroma_mvs;
-  oc_fragment_enc_info   *efrag;
-  oc_fragment            *frag;
-  oc_mb                  *mb;
-  oc_mb_enc_info         *mbinfo;
-  char                    last_mv[2][2];
-  int                    *uncoded_fragi;
-  int                    *uncoded_fragi_end;
-  int                     best_qii;
-  int                     qii;
-  int                     qi;
-  int                     pli;
-  int                     mbi;
-  int                     fragi;
-  int                     ci;
-  int                     nmbs;
-  int                     mvbitsa;
-  int                     mvbitsb;
-  int                     intra_bits;
-  int                     inter_bits;
-  nmbs=_enc->state.nmbs;
-  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
-  oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
-  memset(last_mv,0,sizeof(last_mv));
-  mbinfo=_enc->mbinfo;
-  mvbitsa=mvbitsb=0;
-  inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
-  intra_bits=inter_bits+3;
-  _enc->state.ncoded_mbis=0;
-  for(mbi=0;mbi<nmbs;mbi++){
-    mb=_enc->state.mbs+mbi;
-    if(mb->mode!=OC_MODE_INVALID){
-      oc_fragment_enc_info *efrag;
-      char                  bmvs[2][4][2];
-      char                  mbmv[2];
-      int                   err[OC_NMODES][12];
-      int                   bits[OC_NMODES];
-      int                   coded[13];
-      int                   frag_qii[12][2][2];
-      int                   ncoded;
-      int                   ncoded_luma;
-      int                   mapii;
-      int                   mapi;
-      int                   modei;
-      int                   codedi;
-      int                   mbintrabits;
-      int                   mbpmvbitsa;
-      int                   mbgmvbitsa;
-      int                   mb4mvbitsa;
-      int                   mb4mvbitsb;
-      int                   fti;
-      int                   qti;
-      int                   bi;
-      mbinfo=_enc->mbinfo+mbi;
-      /*Build up a list of coded fragments.*/
-      ncoded=0;
-      for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
-        mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
-        fragi=mb->map[mapi>>2][mapi&3];
-        if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
-      }
-      /*If we don't find any, mark this MB not coded and move on.*/
-      if(ncoded<=0){
-        mb->mode=OC_MODE_NOT_CODED;
-        /*Don't bother to do a MV search against the golden frame.
-          Just re-use the last vector, which should match well since the
-           contents of the MB haven't changed much.*/
-        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
-        mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
-        continue;
-      }
-      /*Count the number of coded blocks that are luma blocks, and replace the
-         block MVs for not-coded blocks with (0,0).*/
-      memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
-      /*Mark the end of the list so we don't go past it below.*/
-      coded[ncoded]=-1;
-      for(mapi=ncoded_luma=0;mapi<4;mapi++){
-        if(coded[ncoded_luma]==mapi)ncoded_luma++;
-        else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
-      }
-      /*Select a qi value for each coded fragment for each frame type and
-         quantizer type.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
-        for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
-          best_qii=0;
-          for(qii=1;qii<_enc->nqis[fti];qii++){
-            if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
-             (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
-             _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
-              best_qii=qii;
-            }
-          }
-          frag_qii[codedi][fti][qti]=best_qii;
-        }
-      }
-      /*Special case: If no luma blocks are coded, but some chroma blocks are,
-         then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
-         be explicitly coded for it.*/
-      if(ncoded_luma<=0){
-        mb->mode=OC_MODE_NOT_CODED;
-        /*Don't bother to do a MV search against the golden frame.*/
-        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
-        /*We do collect bitrate stats for frame type decision.*/
-        mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
-        for(codedi=0;codedi<ncoded;codedi++){
-          mapi=coded[codedi];
-          pli=mapi>>2;
-          fragi=mb->map[pli][mapi&3];
-          frag=_enc->state.frags+fragi;
-          efrag=_enc->frinfo+fragi;
-          /*Set the MB mode and MV in the fragment.*/
-          frag->mbmode=OC_MODE_INTER_NOMV;
-          frag->mv[0]=frag->mv[1]=0;
-          /*Calculate the bitrate estimates.*/
-          err[OC_MODE_INTRA][mapi]=0;
-          for(ci=1;ci<64;ci++){
-            err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
-          }
-          err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
-           OC_FRAME_PREV);
-          qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
-          mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-           OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-          qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
-          bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
-           OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
-          /*Also mark this fragment with the selected INTER qi.
-            It will be reset if we eventually code this as an INTRA frame.*/
-#if defined(OC_BITRATE_STATS)
-          efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
-#endif
-          efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
-          frag->qi=qi;
-        }
-        intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-        inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-        continue;
-      }
-      /*Otherwise, add this to the coded MB list.*/
-      _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
-      /*Compute the chroma MVs for the 4MV mode.*/
-      (*set_chroma_mvs)(bmvs[1],bmvs[0]);
-      /*Do a MV search against the golden frame.*/
-      oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
-      /*We are now ready to do mode decision for this macro block.
-        Mode decision is done by exhaustively examining all potential choices.
-        Since we use a minimum-quality encoding strategy, this amounts to
-         simply selecting the mode which uses the smallest number of bits,
-         since the minimum quality will be met in any mode.
-        Obviously, doing the motion compensation, fDCT, tokenization, and then
-         counting the bits each token uses is computationally expensive.
-        Theora's EOB runs can also split the cost of these tokens across
-         multiple fragments, and naturally we don't know what the optimal
-         choice of Huffman codes will be until we know all the tokens we're
-         going to encode in all the fragments.
-
-        So we use a simple approach to estimating the bit cost of each mode
-         based upon the SAD value of the residual.
-        The mathematics behind the technique are outlined by Kim \cite{Kim03},
-         but the process is very simple.
-        For each quality index and SAD value, we have a table containing the
-         average number of bits needed to code a fragment.
-        The SAD values are placed into a small number of bins (currently 16).
-        The bit counts are obtained by examining actual encoded frames, with
-         optimal Huffman codes selected and EOB bits appropriately divided
-         among all the blocks they involve.
-        A separate QIxSAD table is kept for each mode and color plane.
-        It may be possible to combine many of these, but only experimentation
-         will tell which ones truly represent the same distribution.
-
-        @ARTICLE{Kim03,
-          author="Hyun Mun Kim",
-          title="Adaptive Rate Control Using Nonlinear Regression",
-          journal="IEEE Transactions on Circuits and Systems for Video
-           Technology",
-          volume=13,
-          number=5,
-          pages="432--439",
-          month="May",
-          year=2003
-        }*/
-      memset(bits,0,sizeof(bits));
-      mbintrabits=0;
-      /*Find the SAD values for each coded fragment for each possible mode.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        pli=mapi>>2;
-        bi=mapi&3;
-        fragi=mb->map[pli][bi];
-        frag=_enc->state.frags+fragi;
-        efrag=_enc->frinfo+fragi;
-        err[OC_MODE_INTRA][mapi]=0;
-        for(ci=1;ci<64;ci++){
-          err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
-        }
-        err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
-         OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
-         mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
-         pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
-         last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
-         last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
-         bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
-         0,0,pli,OC_FRAME_GOLD);
-        err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
-         mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
-         pli,OC_FRAME_GOLD);
-        /*Using these distortion values, estimate the number of bits needed to
-           code this fragment in each mode.*/
-        qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
-        mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
-        bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
-        for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
-          bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
-           OC_MINI(err[modei][mapi]>>6,15)];
-        }
-      }
-      /*Bit costs are stored in the table with extra precision.
-        Round them down to whole bits here.*/
-      for(modei=0;modei<OC_NMODES;modei++){
-        bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-      }
-      /*Estimate the cost of coding the label for each mode.
-        See comments at oc_mode_scheme_chooser_cost() for a description of the
-         method.*/
-      for(modei=0;modei<OC_NMODES;modei++){
-        bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
-         modei);
-      }
-      /*Add the motion vector bits for each mode that requires them.*/
-      mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
-       mbinfo->mvs[0][OC_FRAME_PREV][1]);
-      mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
-       mbinfo->mvs[0][OC_FRAME_GOLD][1]);
-      mb4mvbitsa=mb4mvbitsb=0;
-      for(codedi=0;codedi<ncoded_luma;codedi++){
-        mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
-         bmvs[0][coded[codedi]][1]);
-        mb4mvbitsb+=12;
-      }
-      /*We use the same opportunity cost method of estimating the cost of
-         coding the motion vectors with the two different schemes as we do for
-         estimating the cost of the mode labels.
-        However, because there are only two schemes and they're both pretty
-         simple, this can just be done inline.*/
-      bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
-       OC_MINI(mvbitsa,mvbitsb);
-      bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
-       OC_MINI(mvbitsa,mvbitsb);
-      bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
-       mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
-      /*Finally, pick the mode with the cheapest estimated bit cost.*/
-      mb->mode=0;
-      for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
-        /*Do not select 4MV mode when not all the luma blocks are coded when
-           we're in VP3 compatibility mode.*/
-        if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
-          continue;
-        }
-        mb->mode=modei;
-      }
-#if defined(OC_BITRATE_STATS)
-      /*Remember the error for the mode we selected in each fragment.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        efrag=_enc->frinfo+fragi;
-        efrag->eerror=err[mb->mode][mapi];
-      }
-#endif
-      /*Go back and store the selected qi index corresponding to the selected
-         mode in each fragment.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        frag=_enc->state.frags+fragi;
-        efrag=_enc->frinfo+fragi;
-        efrag->qii=(unsigned char)
-         frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
-        frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
-      }
-      inter_bits+=bits[mb->mode];
-      intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-      oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
-      switch(mb->mode){
-        case OC_MODE_INTER_MV:{
-          mvbitsa+=mbpmvbitsa;
-          mvbitsb+=12;
-          last_mv[1][0]=last_mv[0][0];
-          last_mv[1][1]=last_mv[0][1];
-          mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
-          mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
-        }break;
-        case OC_MODE_INTER_MV_LAST:{
-          mbmv[0]=last_mv[0][0];
-          mbmv[1]=last_mv[0][1];
-        }break;
-        case OC_MODE_INTER_MV_LAST2:{
-          mbmv[0]=last_mv[1][0];
-          mbmv[1]=last_mv[1][1];
-          last_mv[1][0]=last_mv[0][0];
-          last_mv[1][1]=last_mv[0][1];
-          last_mv[0][0]=mbmv[0];
-          last_mv[0][1]=mbmv[1];
-        }break;
-        case OC_MODE_INTER_MV_FOUR:{
-          mvbitsa+=mb4mvbitsa;
-          mvbitsb+=mb4mvbitsb;
-          if(ncoded_luma>0){
-            /*After 4MV mode, the last MV is the one from the last coded luma
-               block.*/
-            last_mv[1][0]=last_mv[0][0];
-            last_mv[1][1]=last_mv[0][1];
-            last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
-            last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
-          }
-        }break;
-        case OC_MODE_GOLDEN_MV:{
-          mvbitsa+=mbgmvbitsa;
-          mvbitsb+=12;
-          mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
-          mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
-        }break;
-      }
-      if(OC_MODE_HAS_MV[mb->mode]){
-        /*Special case 4MV mode.
-          MVs are stored in bmvs.*/
-        if(mb->mode==OC_MODE_INTER_MV_FOUR){
-          for(codedi=0;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            pli=mapi>>2;
-            bi=mapi&3;
-            fragi=mb->map[pli][bi];
-            frag=_enc->state.frags+fragi;
-            frag->mbmode=mb->mode;
-            frag->mv[0]=bmvs[!!pli][bi][0];
-            frag->mv[1]=bmvs[!!pli][bi][1];
-          }
-        }
-        /*For every other mode with a MV, it is stored in mbmv.*/
-        else{
-          for(codedi=0;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            fragi=mb->map[mapi>>2][mapi&3];
-            frag=_enc->state.frags+fragi;
-            frag->mbmode=mb->mode;
-            frag->mv[0]=mbmv[0];
-            frag->mv[1]=mbmv[1];
-          }
-        }
-      }
-      /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
-      else{
-        for(codedi=0;codedi<ncoded;codedi++){
-          mapi=coded[codedi];
-          fragi=mb->map[mapi>>2][mapi&3];
-          frag=_enc->state.frags+fragi;
-          frag->mbmode=mb->mode;
-          frag->mv[0]=frag->mv[1]=0;
-        }
-      }
-    }
-  }
-  /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
-  if(mvbitsb<mvbitsa){
-    _enc->mv_scheme=1;
-    inter_bits+=mvbitsb;
-  }
-  else{
-    _enc->mv_scheme=0;
-    inter_bits+=mvbitsa;
-  }
-  inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
-   _enc->mode_scheme_chooser.scheme_list[0]];
-  /*The easiest way to count the bits needed for coded/not coded fragments is
-     to code them.
-    We need to do this anyway, might as well do it now.*/
-  oggpackB_reset(&_enc->opb_coded_flags);
-  inter_bits+=oc_enc_partial_sb_flags_pack(_enc);
-  inter_bits+=oc_enc_coded_sb_flags_pack(_enc);
-  inter_bits+=oc_enc_coded_block_flags_pack(_enc);
-  /*Select the quantizer list for INTER frames.*/
-  _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
-  for(qii=0;qii<_enc->state.nqis;qii++){
-    _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
-  }
-  if(intra_bits>inter_bits){
-    _enc->est_bits=inter_bits;
-    return OC_INTER_FRAME;
-  }
-  /*All INTRA mode is smaller, but we haven't counted up the cost of all the
-     not coded fragments we will now have to code.*/
-  uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
-  for(pli=0;pli<3;pli++){
-    uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
-    while(uncoded_fragi-->uncoded_fragi_end){
-      fragi=*uncoded_fragi;
-      frag=_enc->state.frags+fragi;
-      /*Assume a very small bit cost for invalid fragments.*/
-      if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
-      else{
-        int eerror;
-        eerror=0;
-        efrag=_enc->frinfo+fragi;
-        for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
-#if defined(OC_BITRATE_STATS)
-        efrag->eerror=eerror;
-#endif
-        qi=_enc->qis[OC_INTRA_FRAME][0];
-        for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
-          if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
-           efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
-            qi=_enc->qis[OC_INTRA_FRAME][qii];
-          }
-        }
-        intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(eerror>>8,15)];
-        /*If it turns out INTRA mode was more expensive, we're done.*/
-        if(intra_bits>inter_bits){
-          _enc->est_bits=inter_bits;
-          return OC_INTER_FRAME;
-        }
-      }
-    }
-  }
-  /*So, we've compared the full cost estimates, and INTRA is still better.
-    Code an INTRA frame instead.*/
-  oc_enc_mark_all_intra(_enc);
-  _enc->est_bits=intra_bits;
-  return OC_INTRA_FRAME;
-}
-
 /*Performs a motion-compensated fDCT for each fragment coded in a mode other
    than INTRA.*/
-static void oc_enc_do_inter_dcts(oc_enc_ctx *_enc){
+void oc_enc_do_inter_dcts(oc_enc_ctx *_enc){
   int *coded_fragi;
   int *coded_fragi_end;
   int  pli;
@@ -2355,170 +1321,7 @@
   }
 }
 
-static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
-  const theora_huff_code *codes;
-  const int              *mode_ranks;
-  int                    *coded_mbi;
-  int                    *coded_mbi_end;
-  int                     scheme;
-  scheme=_enc->mode_scheme_chooser.scheme_list[0];
-  oggpackB_write(&_enc->opb,scheme,3);
-  if(scheme==0){
-    int ranks[8];
-    int mi;
-    /*The numbers associated with each mode in the stream are slightly
-       different than what we use in the source.
-      The lookup here converts between the two.*/
-    for(mi=0;mi<OC_NMODES;mi++){
-      ranks[OC_MODE_SCHEMES[6][mi]]=
-       _enc->mode_scheme_chooser.scheme0_ranks[mi];
-    }
-    for(mi=0;mi<OC_NMODES;mi++)oggpackB_write(&_enc->opb,ranks[mi],3);
-  }
-  codes=_enc->mode_scheme_chooser.mode_codes[scheme];
-  mode_ranks=_enc->mode_scheme_chooser.mode_ranks[scheme];
-  coded_mbi=_enc->state.coded_mbis;
-  coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
-  for(;coded_mbi<coded_mbi_end;coded_mbi++){
-    const theora_huff_code *code;
-    oc_mb                  *mb;
-    mb=_enc->state.mbs+*coded_mbi;
-    code=codes+mode_ranks[mb->mode];
-    oggpackB_write(&_enc->opb,code->pattern,code->nbits);
-  }
-}
 
-static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _dx,int _dy){
-  const theora_huff_code *code;
-  code=OC_MV_CODES[_enc->mv_scheme]+_dx+31;
-  oggpackB_write(&_enc->opb,code->pattern,code->nbits);
-  code=OC_MV_CODES[_enc->mv_scheme]+_dy+31;
-  oggpackB_write(&_enc->opb,code->pattern,code->nbits);
-}
-
-static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
-  int *coded_mbi;
-  int *coded_mbi_end;
-  oggpackB_write(&_enc->opb,_enc->mv_scheme,1);
-  coded_mbi=_enc->state.coded_mbis;
-  coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
-  for(;coded_mbi<coded_mbi_end;coded_mbi++){
-    oc_mb          *mb;
-    oc_mb_enc_info *mbinfo;
-    int             mbi;
-    mbi=*coded_mbi;
-    mb=_enc->state.mbs+mbi;
-    switch(mb->mode){
-      case OC_MODE_INTER_MV:
-      case OC_MODE_GOLDEN_MV:{
-        int which_frame;
-        which_frame=OC_FRAME_FOR_MODE[mb->mode];
-        mbinfo=_enc->mbinfo+mbi;
-        oc_enc_mv_pack(_enc,mbinfo->mvs[0][which_frame][0],
-         mbinfo->mvs[0][which_frame][1]);
-      }break;
-      case OC_MODE_INTER_MV_FOUR:{
-        int bi;
-        mbinfo=_enc->mbinfo+mbi;
-        for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=mb->map[0][bi];
-          if(fragi>=0&&_enc->state.frags[fragi].coded){
-            oc_enc_mv_pack(_enc,mbinfo->bmvs[bi][0],mbinfo->bmvs[bi][1]);
-          }
-        }
-      }break;
-    }
-  }
-}
-
-static int oc_enc_deltaframe(oc_enc_ctx *_enc){
-  oggpackB_reset(&_enc->opb);
-  oc_enc_mark_coded(_enc);
-  /*Only proceed if we have some coded blocks.
-    No coded blocks -> dropped frame -> 0 byte packet.*/
-  if(_enc->state.ncoded_fragis[0]!=0||
-   _enc->state.ncoded_fragis[1]!=0||
-   _enc->state.ncoded_fragis[2]!=0){
-    oc_enc_quant_sel_quality(_enc,0);
-    _enc->state.frame_type=oc_enc_choose_mbmodes(_enc);
-    if(_enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(_enc);
-    oc_enc_quant_dc(_enc);
-    oc_enc_residual_tokenize(_enc);
-    oc_enc_frame_header_pack(_enc);
-    if(_enc->state.frame_type==OC_INTER_FRAME){
-      oggpackB_writecopy(&_enc->opb,
-       oggpackB_get_buffer(&_enc->opb_coded_flags),
-       oggpackB_bits(&_enc->opb_coded_flags));
-      oc_enc_mb_modes_pack(_enc);
-      oc_enc_mvs_pack(_enc);
-    }
-    oc_enc_block_qis_pack(_enc);
-    /*Pack the quantized DCT coefficients.*/
-    oc_enc_residual_tokens_pack(_enc);
-  }
-  /*Success: Mark the packet as ready to be flushed.*/
-  _enc->packet_state=OC_PACKET_READY;
-  return 0;
-}
-
-
-static int oc_enc_init(oc_enc_ctx *_enc,const theora_info *_info){
-  int ret;
-  /*Initialize the shared encoder/decoder state.*/
-  ret=oc_state_init(&_enc->state,_info);
-  if(ret<0)return ret;
-  _enc->block_coded_flags=_ogg_calloc(_enc->state.nfrags,
-   sizeof(_enc->block_coded_flags[0]));
-  /*Initialize our packet buffers.*/
-  oggpackB_writeinit(&_enc->opb);
-  oggpackB_writeinit(&_enc->opb_coded_flags);
-  /*Allocate and initialize storage for encoder-specific fragment and macro
-     block storage, as well as DCT token storage.*/
-  _enc->frinfo=_ogg_calloc(_enc->state.nfrags,
-   sizeof(_enc->frinfo[0]));
-  _enc->mbinfo=_ogg_calloc(_enc->state.nmbs,sizeof(_enc->mbinfo[0]));
-  _enc->dct_tokens=(unsigned char **)oc_malloc_2d(64,
-   _enc->state.nfrags,sizeof(_enc->dct_tokens[0][0]));
-  _enc->extra_bits=(ogg_uint16_t **)oc_malloc_2d(64,
-   _enc->state.nfrags,sizeof(_enc->extra_bits[0][0]));
-  oc_enc_init_mbinfo(_enc);
-  /*Do one-time mode scheme chooser initialization.*/
-  oc_mode_scheme_chooser_init(&_enc->mode_scheme_chooser);
-  /*Set the maximum distance between key frames.*/
-  _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
-  /*Map the qi to a multiple of JND values.*/
-  _enc->qscale=_info->quality>=63?0.5F:
-   1.5F*OC_POWF(2,0.0625F*(64-_info->quality));
-  /*Initialize the motion compensation, high-level importance map, and
-     low-level psychovisual model plug-ins.*/
-  _enc->mcenc=oc_mcenc_alloc(_enc);
-  _enc->impmap=oc_impmap_alloc(_enc);
-  _enc->psych=oc_psych_alloc(_enc);
-  /*Reset the packet-out state machine.*/
-  _enc->packet_state=OC_PACKET_INFO_HDR;
-  /*Mark us as not VP3-compatible.*/
-  _enc->vp3_compatible=0;
-  /*Set the Huffman codes and quantization parameters to the defaults.*/
-  memcpy(_enc->huff_codes,OC_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
-  oc_enc_set_quant_params(_enc,NULL);
-  return 0;
-}
-
-static void oc_enc_clear(oc_enc_ctx *_enc){
-  oc_psych_free(_enc->psych);
-  oc_impmap_free(_enc->impmap);
-  oc_mcenc_free(_enc->mcenc);
-  oc_free_2d(_enc->extra_bits);
-  oc_free_2d(_enc->dct_tokens);
-  _ogg_free(_enc->mbinfo);
-  _ogg_free(_enc->frinfo);
-  _ogg_free(_enc->block_coded_flags);
-  oc_state_clear(&_enc->state);
-}
-
-
-
 theora_enc_ctx *theora_encode_alloc(const theora_info *_info){
   oc_enc_ctx *enc;
   if(_info==NULL)return NULL;
@@ -2571,7 +1374,7 @@
       }
       _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force,
        1U<<_enc->state.info.keyframe_granule_shift);
-      (*(ogg_uint32_t *)_buf)=_enc->keyframe_frequency_force;
+      *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force;
       return 0;
     }break;
     case OC_ENCCTL_SET_VP3_COMPATIBLE:{
@@ -2591,27 +1394,57 @@
       /*If we have more than 4095 super blocks, VP3's RLE coding might
          overflow.
         We could overcome this by ensuring we flip the coded/not-coded flags on
-         at lease one super block in the frame, but we pick the simple solution
+         at least one super block in the frame, but we pick the simple solution
          of just marking the stream incompatible instead.
         It's unlikely the old VP3 codec would be able to decode streams at this
          resolution in real time in the first place.*/
        _enc->state.nsbs>4095){
         vp3_compatible=0;
       }
-      *((int *)_buf)=vp3_compatible;
+      *(int *)_buf=vp3_compatible;
       return 0;
     }break;
+    case OC_ENCCTL_GET_SPLEVEL_MAX:{
+      if(_enc==NULL||_buf==NULL)return OC_FAULT;
+      if(_buf_sz!=sizeof(int))return OC_EINVAL;
+      /*We can only manipulate speed in the context of a given encoding mode.
+        Ensure one is selected if the user has not already done so.*/
+      if(_enc->set_speed==NULL)oc_enc_enable_default_mode(_enc);
+      *(int *)_buf=_enc->speed_max;
+      return 0;
+    }break;
+    case OC_ENCCTL_SET_SPLEVEL:{
+      int speed;
+      if(_enc==NULL||_buf==NULL)return OC_FAULT;
+      if(_buf_sz!=sizeof(int))return OC_EINVAL;
+      speed=*(int *)_buf;
+      /*We can only manipulate speed in the context of a given encoding mode.
+        Ensure one is selected if the user has not already done so.*/
+      if(_enc->set_speed==NULL)oc_enc_enable_default_mode(_enc);
+      if(speed<0||speed>_enc->speed_max)return OC_EINVAL;
+      (*_enc->set_speed)(_enc,speed);
+      return 0;
+    }break;
+    case OC_ENCCTL_SETUP_VBR:{
+      if(_enc==NULL)return OC_FAULT;
+      if(_buf==NULL&&_buf_sz!=0||_buf!=NULL&&_buf_sz!=sizeof(theora_vbr_cfg)){
+        return OC_EINVAL;
+      }
+      if(_enc->vbr==NULL)_enc->vbr=oc_enc_vbr_alloc(_enc);
+      return oc_enc_vbr_enable(_enc->vbr,(theora_vbr_cfg *)_buf);
+    }break;
     default:return OC_IMPL;
   }
 }
 
 int theora_encode_ycbcr_in(theora_enc_ctx *_enc,theora_ycbcr_buffer _img){
   theora_ycbcr_buffer img;
+  int                 y_avail[3];
   int                 cwidth;
   int                 cheight;
   int                 ret;
   int                 rfi;
-  int                 mbi;
+  int                 pli;
   /*Step 1: validate parameters.*/
   if(_enc==NULL||_img==NULL)return OC_FAULT;
   if(_enc->packet_state==OC_PACKET_DONE)return OC_EINVAL;
@@ -2627,94 +1460,37 @@
   }
   /*Flip the input buffer upside down.*/
   oc_ycbcr_buffer_flip(img,_img);
-  /*Step 2: Update state.*/
+  /*Step 2: Update buffer state.*/
   if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
-    /*Right now the reconstructed frame has only the coded blocks in it.
-      We either need to copy all the other blocks into it, or copy the
-       reconstructed blocks back into the previous frame, whichever is
-       faster.*/
+    _enc->state.ref_frame_idx[OC_FRAME_PREV]=
+     _enc->state.ref_frame_idx[OC_FRAME_SELF];
     if(_enc->state.frame_type==OC_INTRA_FRAME){
-      /*Intra frames always code all fragments, so there is nothing to copy.
-        The new frame becomes both the previous and gold reference frames.*/
+      /*The new frame becomes both the previous and gold reference frames.*/
       _enc->state.keyframe_num=_enc->state.curframe_num;
       _enc->state.ref_frame_idx[OC_FRAME_GOLD]=
-       _enc->state.ref_frame_idx[OC_FRAME_PREV]=
        _enc->state.ref_frame_idx[OC_FRAME_SELF];
     }
-    else{
-      int *plfragis[3];
-      int  ncoded;
-      int  nuncoded;
-      int  pli;
-      ncoded=_enc->state.ncoded_fragis[0]+_enc->state.ncoded_fragis[1]+
-       _enc->state.ncoded_fragis[2];
-      nuncoded=_enc->state.nfrags-ncoded;
-      if(ncoded<nuncoded&&
-       _enc->state.ref_frame_idx[OC_FRAME_PREV]!=
-       _enc->state.ref_frame_idx[OC_FRAME_GOLD]){
-        plfragis[0]=_enc->state.coded_fragis;
-        plfragis[1]=plfragis[0]+_enc->state.ncoded_fragis[0];
-        plfragis[2]=plfragis[1]+_enc->state.ncoded_fragis[1];
-        for(pli=0;pli<3;pli++){
-          oc_state_frag_copy(&_enc->state,plfragis[pli],
-           _enc->state.ncoded_fragis[pli],OC_FRAME_PREV,OC_FRAME_SELF,pli);
-        }
-        _enc->state.ref_frame_idx[OC_FRAME_SELF]=
-         _enc->state.ref_frame_idx[OC_FRAME_PREV];
-      }
-      else{
-        plfragis[0]=_enc->state.uncoded_fragis-_enc->state.nuncoded_fragis[0];
-        plfragis[1]=plfragis[0]-_enc->state.nuncoded_fragis[1];
-        plfragis[2]=plfragis[1]-_enc->state.nuncoded_fragis[2];
-        for(pli=0;pli<3;pli++){
-          oc_state_frag_copy(&_enc->state,plfragis[pli],
-           _enc->state.nuncoded_fragis[pli],OC_FRAME_SELF,OC_FRAME_PREV,pli);
-        }
-        _enc->state.ref_frame_idx[OC_FRAME_PREV]=
-         _enc->state.ref_frame_idx[OC_FRAME_SELF];
-      }
-    }
-    /*Filter block edges.*/
-    oc_state_loop_filter(&_enc->state,OC_FRAME_PREV);
-#if defined(OC_DUMP_IMAGES)
-    oc_state_dump_frame(&_enc->state,OC_FRAME_PREV,"rec");
-#endif
-    /*Fill in the borders from the reconstructed version of the last encoded
-       frame.*/
-    oc_state_borders_fill(&_enc->state,
-     _enc->state.ref_frame_idx[OC_FRAME_PREV]);
   }
+  /*If no encoding mode has been explicitly enabled by the application,
+     enable the default encoding mode with a default configuration.*/
+  else if(_enc->pipe==NULL)oc_enc_enable_default_mode(_enc);
   /*Select a free buffer to use for the reconstructed version of this frame.*/
   for(rfi=0;rfi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
    rfi==_enc->state.ref_frame_idx[OC_FRAME_PREV];rfi++);
   _enc->state.ref_frame_idx[OC_FRAME_SELF]=rfi;
-  /*Move the motion vector predictors back a frame.*/
-  for(mbi=_enc->state.fplanes[0].nsbs<<2;mbi-->0;){
-    oc_mb_enc_info *emb;
-    emb=_enc->mbinfo+mbi;
-    memmove(emb->mvs+1,emb->mvs,2*sizeof(emb->mvs[0]));
-  }
   _enc->state.curframe_num++;
   /*Fill the fragment array with pointers into the user buffer.*/
   oc_state_fill_buffer_ptrs(&_enc->state,OC_FRAME_IO,img);
-  /*Step 3: Analyze motion.*/
-  oc_mcenc_analyze(_enc->mcenc);
-  /*Step 4: Compute importance map.*/
-  oc_impmap_fill(_enc->impmap,
-   _enc->state.info.fps_denominator/(float)_enc->state.info.fps_numerator);
-  /*Step 5: Compute coefficient tolerances.*/
-  oc_psych_scan(_enc->psych,0);
-  /*Step 6: Encode!*/
-  if(_enc->state.curframe_num==0||
-   _enc->state.curframe_num-_enc->state.keyframe_num>=
-   _enc->keyframe_frequency_force){
-    ret=oc_enc_keyframe(_enc);
-    if(ret<0)return ret;
-  }
-  else{
-    ret=oc_enc_deltaframe(_enc);
-    if(ret<0)return ret;
-  }
+  /*Reset the encoding pipeline.*/
+  ret=(*_enc->pipe->pipe_start)(_enc->pipe);
+  if(ret<0)return ret;
+  /*Push the image into the pipeline.*/
+  for(pli=0;pli<3;pli++)y_avail[pli]=_img[pli].height;
+  ret=(*_enc->pipe->pipe_proc)(_enc->pipe,y_avail);
+  if(ret<0)return ret;
+  /*Flush the results through.*/
+  ret=(*_enc->pipe->pipe_end)(_enc->pipe);
+  if(ret<0)return ret;
   /*Note: All buffer management, etc., that is done after a frame is encoded
      is delayed until the next frame is encoded.
     This allows for a future API that would let an encoding application
@@ -2730,6 +1506,11 @@
      (_enc->state.keyframe_num<<_enc->state.info.keyframe_granule_shift)+
      (_enc->state.curframe_num-_enc->state.keyframe_num);
   }
+#if defined(OC_DUMP_IMAGES)
+  /*This is done after the granpos update, because that's what it uses to name
+     the output file.*/
+  oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec");
+#endif
   return 0;
 }
 

Added: experimental/derf/theora-exp/lib/encvbr.c
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encvbr.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,1416 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encvbr.h"
+#include "fdct.h"
+
+
+
+/*Returns the number of bits used by the given motion vector with the VLC
+   motion vector codes (as opposed to the CLC codes, which always use 12 bits).
+  _dx: The X component of the vector, in half-pel units.
+  _dy: The Y component of the vector, in half-pel units.
+  Return: The number of bits required to store the vector with the VLC codes.*/
+static int oc_mvbitsa(int _dx,int _dy){
+  return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
+}
+
+
+
+/*Select the set of quantizers to use for the current frame for each possible
+   frame type (intra or inter).
+  This does not assign a quantizer to each fragment, as that depends on the
+   quantizer type used and thus is done during mode decision.*/
+static void oc_enc_vbr_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
+  unsigned              qmax[2][3];
+  int                   qi_min[2];
+  int                   qi_max[2];
+  int                   fti;
+  int                   qti;
+  int                   pli;
+  int                   dc_qi[2];
+  qi_min[0]=_enc->vbr->cfg.kf_qi_min;
+  qi_min[1]=_enc->vbr->cfg.df_qi_min;
+  qi_max[0]=_enc->vbr->cfg.kf_qi_max;
+  qi_max[1]=_enc->vbr->cfg.df_qi_max;
+  /*The first quantizer value is used for DC coefficients.
+    Select one that allows us to meet our quality requirements.*/
+  for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
+    qmax[qti][pli]=OC_MAXI(2U*_enc->vbr->dc_tol_mins[pli],
+     OC_DC_QUANT_MIN[qti]);
+  }
+  /*For intra frames...(containing just INTRA fragments)*/
+  for(dc_qi[0]=qi_min[0];dc_qi[0]<qi_max[0];dc_qi[0]++){
+    if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
+     _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
+     _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
+      break;
+    }
+  }
+  /*For inter frames...(containing both INTER and INTRA fragments)*/
+  if(!_intra_only){
+    for(dc_qi[1]=OC_CLAMPI(qi_min[1],dc_qi[0],qi_max[1]);dc_qi[1]<qi_max[1];
+     dc_qi[1]++){
+      if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
+       _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
+       _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
+        break;
+      }
+    }
+  }
+  /*Now we select a full qi list for each frame type.*/
+  for(fti=0;fti<1+!_intra_only;fti++){
+    oc_fragment_enc_info *efrag;
+    int                   ncoded_fragis;
+    int                   nqis[64];
+    int                   qi;
+    int                   qi0;
+    int                   qi1;
+    int                   qi2;
+    /*Here we count up the number of fragments that can use each qi value.
+      Unless we know this is an intra frame, we don't know what quantizer type
+       will be used for each fragment, so we just count both of them.*/
+    memset(nqis,0,sizeof(nqis));
+    if(fti){
+      int *coded_fragi;
+      int *coded_fragi_end;
+      coded_fragi=_enc->state.coded_fragis;
+      ncoded_fragis=_enc->state.ncoded_fragis[0]+
+       _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
+      coded_fragi_end=coded_fragi+ncoded_fragis;
+      for(;coded_fragi<coded_fragi_end;coded_fragi++){
+        efrag=_enc->frinfo+*coded_fragi;
+        for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
+      }
+    }
+    else{
+      oc_fragment_enc_info *efrag_end;
+      ncoded_fragis=_enc->state.nfrags;
+      efrag=_enc->frinfo;
+      efrag_end=efrag+ncoded_fragis;
+      for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
+    }
+    /*We'll now choose the qi values that divide the fragments into equally
+       sized groups, or as close as we can make it.
+      We account for the DC coefficients by adding an extra amount to the qi
+       value they require.
+      Since there are usually many more DC coefficients coded than any one AC
+       coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
+    nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
+    /*Convert this into a moment table.*/
+    for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
+    /*If we have a lower limit on the QI range, promote and fragments with a
+       smaller QI, to ensure they're counted.*/
+    if(qi_min[fti]>0)nqis[qi_min[fti]]=nqis[0];
+    /*Select our first quantizer.*/
+    for(qi0=qi_max[fti]+1;qi0-->qi_min[fti]&&nqis[qi0]<=0;);
+    for(qi1=qi0-1;qi1>=qi_min[fti]&&nqis[qi1]<=nqis[qi0];qi1--);
+    /*Test to make sure there are even two unique quantizers.*/
+    if(qi1>=qi_min[fti]){
+      ogg_int64_t best_metric;
+      ogg_int64_t metric;
+      int         best_qi1;
+      int         best_qi2;
+      int         qii;
+      for(qi2=qi1-1;qi2>=qi_min[fti]&&nqis[qi2]<=nqis[qi1];qi2--);
+      /*Test to make sure there are three unique quantizers.*/
+      if(qi2>=0){
+        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+         (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+        best_qi1=qi1;
+        best_qi2=qi2;
+        for(;nqis[qi1]<nqis[1];qi1--){
+          for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
+            metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+             (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+            if(metric>=best_metric){
+              best_qi1=qi1;
+              best_qi2=qi2;
+              best_metric=metric;
+            }
+          }
+        }
+        _enc->qis[fti][0]=qi0;
+        _enc->qis[fti][1]=best_qi1;
+        _enc->qis[fti][2]=best_qi2;
+        _enc->nqis[fti]=3;
+      }
+      else{
+        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+        best_qi1=qi1;
+        if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
+          metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+          if(metric>best_metric){
+            best_qi1=qi1;
+            best_metric=metric;
+          }
+        }
+        _enc->qis[fti][0]=qi0;
+        _enc->qis[fti][1]=best_qi1;
+        _enc->nqis[fti]=2;
+      }
+      /*Right now qis[0] is the largest.
+        We want to use the smallest that is still large enough for our DC
+         coefficients.*/
+      for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
+        qi0=_enc->qis[fti][0];
+        _enc->qis[fti][0]=_enc->qis[fti][qii];
+        _enc->qis[fti][qii]=qi0;
+      }
+    }
+    else{
+      _enc->qis[fti][0]=qi0;
+      _enc->nqis[fti]=1;
+    }
+    /*If we're in VP3 compatibility mode, just use the first quantizer.*/
+    if(_enc->vp3_compatible)_enc->nqis[fti]=1;
+  }
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also selects a quantizer value for each fragment and builds up the
+   coded fragment list (in coded order) and clears the uncoded fragment list.
+  It does not update the coded macro block list, as that is not used when
+   coding INTRA frames.*/
+static void oc_enc_vbr_mark_all_intra(oc_enc_ctx *_enc){
+  oc_sb *sb;
+  oc_sb *sb_end;
+  int    pli;
+  int    qii;
+  int    ncoded_fragis;
+  int    prev_ncoded_fragis;
+  /*Select the quantizer list for INTRA frames.*/
+  _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
+  }
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb=sb_end=_enc->state.sbs;
+  for(pli=0;pli<3;pli++){
+    const oc_fragment_plane *fplane;
+    fplane=_enc->state.fplanes+pli;
+    sb_end+=fplane->nsbs;
+    for(;sb<sb_end;sb++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
+          oc_fragment_enc_info *efrag;
+          oc_fragment          *frag;
+          int                   fragi;
+          int                   best_qii;
+          fragi=sb->map[quadi][bi];
+          frag=_enc->state.frags+fragi;
+          frag->coded=1;
+          frag->mbmode=OC_MODE_INTRA;
+          efrag=_enc->frinfo+fragi;
+          best_qii=0;
+          for(qii=1;qii<_enc->state.nqis;qii++){
+            if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
+             (_enc->state.qis[best_qii]<efrag->qi_min[0]||
+             _enc->state.qis[qii]<_enc->state.qis[best_qii])){
+              best_qii=qii;
+            }
+          }
+          efrag->qii=(unsigned char)best_qii;
+          frag->qi=_enc->state.qis[best_qii];
+          _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+#if defined(OC_BITRATE_STATS)
+          /*Compute the error function used for intra mode fragments.
+            This function can only use information known at mode decision time, and
+             so excludes the DC component.
+            TODO: Separate this out somewhere more useful.*/
+          {
+            oc_fragment_enc_info *efrag;
+            int                   ci;
+            int                   eerror;
+            efrag=_enc->frinfo+fragi;
+            eerror=0;
+            for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+            efrag->eerror=eerror;
+          }
+#endif
+        }
+      }
+    }
+    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+    _enc->state.nuncoded_fragis[pli]=0;
+  }
+  _enc->ncoded_frags=ncoded_fragis;
+}
+
+
+
+/*Quantize and predict the DC coefficients.
+  This is done in a separate step because the prediction of DC coefficients
+   occurs in image order, not in the Hilbert-curve order, unlike the rest of
+   the encoding process.*/
+static void oc_enc_vbr_quant_dc(oc_enc_ctx *_enc){
+  oc_fragment_enc_info *efrag;
+  oc_fragment          *frag;
+  int                   pli;
+  frag=_enc->state.frags;
+  efrag=_enc->frinfo;
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    unsigned           fquant;
+    unsigned           iquant;
+    int                pred_last[3];
+    int                fragx;
+    int                fragy;
+    pred_last[OC_FRAME_GOLD]=0;
+    pred_last[OC_FRAME_PREV]=0;
+    pred_last[OC_FRAME_SELF]=0;
+    fplane=_enc->state.fplanes+pli;
+    for(fragy=0;fragy<fplane->nvfrags;fragy++){
+      for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
+        int qc_pred;
+        int qc;
+        if(!frag->coded)continue;
+        qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
+        /*Fragments outside the displayable region must still be coded in key
+           frames.
+          To minimize wasted bits, just use the predicted DC value.
+          TODO: We might do a better job in the lower-left hand corner by
+           propagating over the DC value of the first actually coded fragment,
+           but for the moment this is not done.*/
+        if(frag->invalid)qc=0;
+        else{
+          int c;
+          int c_abs;
+          int qti;
+          /*We now center the DC coefficient range around the predicted value
+             and perform token bits optimization based on the HVS-determined
+             tolerance range.
+            For more details, see oc_enc_vbr_frag_quant_tokenize().*/
+          qti=frag->mbmode!=OC_MODE_INTRA;
+          iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
+          c=efrag->dct_coeffs[0]-qc_pred*iquant;
+          c_abs=abs(c);
+          if(c_abs<=efrag->tols[0])qc=0;
+          else{
+            int qc_signed[2];
+            int qc_max;
+            int qc_min;
+            int qc_offs;
+            int c_sign;
+            int c_min;
+            int c_recon;
+            int cati;
+            fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
+            qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+            c_sign=c<0;
+            c_recon=(qc_max-1)*iquant;
+            c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
+            for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
+            if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
+            else{
+              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+               cati++){
+                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+              }
+              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+            }
+            qc_signed[0]=qc;
+            qc_signed[1]=-qc;
+            qc=qc_signed[c_sign];
+          }
+        }
+        pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
+        efrag->dct_coeffs[0]=(ogg_int16_t)qc;
+      }
+    }
+  }
+}
+
+/*Quantize and tokenize the given fragment.
+  _efrag:   The encoder information for the fragment to quantize.
+  _qcoeffs: The quantized coefficients, in zig-zag order.
+  _fquant:  The forward quantization matrix to use.
+  _iquant:  The inverse quantization matrix to use.
+  Return: The number of coefficients before any final zero run.*/
+static int oc_enc_vbr_frag_quant_tokenize(oc_enc_ctx *_enc,
+ oc_fragment_enc_info *_efrag,ogg_int16_t _qcoeffs[64],
+ const ogg_uint16_t _fquant[64],const ogg_uint16_t _iquant[64]){
+  int zzi;
+  int zrun;
+  int qc;
+  int qc_offs;
+  int c_sign;
+  int cati;
+  int tli;
+  /*The DC coefficient is already quantized (it had to be for DC prediction).
+    Here we just tokenize it.*/
+  if(_efrag->dct_coeffs[0]){
+    qc=abs(_efrag->dct_coeffs[0]);
+    c_sign=_efrag->dct_coeffs[0]<0;
+    switch(qc){
+      case 1:{
+        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+         (unsigned char)(OC_ONE_TOKEN+c_sign);
+      }break;
+      case 2:{
+        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+         (unsigned char)(OC_TWO_TOKEN+c_sign);
+      }break;
+      default:{
+        if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
+          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+           (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
+          _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
+        }
+        else{
+          qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+          for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
+            qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+          }
+          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+           (unsigned char)(OC_DCT_VAL_CAT3+cati);
+          _enc->extra_bits[0][_enc->nextra_bits[0]++]=
+           (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+        }
+      }
+    }
+    zrun=0;
+  }
+  else zrun=1;
+  /*Now we quantize and tokenize each AC coefficient.*/
+  for(zzi=1;zzi<64;zzi++){
+    int qc_signed[2];
+    int qc_max;
+    int qc_min;
+    int c_sign;
+    int c_abs;
+    int c_min;
+    int c_recon;
+    int ci;
+    ci=OC_FZIG_ZAG[zzi];
+    c_abs=abs(_efrag->dct_coeffs[ci]);
+    /*Best case: we can encode this as a zero.*/
+    if(c_abs<=_efrag->tols[ci]){
+      zrun++;
+      _qcoeffs[zzi]=0;
+    }
+    else{
+      c_sign=_efrag->dct_coeffs[ci]<0;
+      /*qc_max is the most accurate quantized value.
+        This is the largest possible (absolute) value we will use.*/
+      qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+      /*qc_min is the smallest possible (by absolute value) quantized value
+         whose dequantized value is within the HVS-determined tolerance
+         range.*/
+      /*TODO: qc_min could be computed by a division (we do not want to allow
+         the rounding errors that are possible with the mul+shift quantization
+         used for qc_max), which would allow qc_max to be calculated only if
+         needed below.
+        Is this faster?
+        Who knows.*/
+      c_recon=(qc_max-1)*_iquant[ci];
+      c_min=c_abs-_efrag->tols[ci];
+      for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
+      /*We now proceed to find a token that is as close to qc_max as possible,
+         but does not use any more bits than would be required for qc_min.
+        The general assumption we make is that encoding a value closer to 0
+         always uses fewer bits.
+        qc_min can still reach 0 here despite the test above, if the quantizer
+         value is larger than the tolerance (which can happen for very small
+         tolerances; the quantizer value has a minimum it cannot go below).*/
+      if(qc_min==0){
+        zrun++;
+        _qcoeffs[zzi]=0;
+      }
+      else{
+        /*If we have an outstanding zero run, code it now.*/
+        if(zrun>0){
+          /*The zero run tokens appear on the list for the first zero in the
+             run.*/
+          tli=zzi-zrun;
+          /*Second assumption: coding a combined run/value token always uses
+             fewer bits than coding them separately.*/
+          /*CAT1 run/value tokens: the value is 1.*/
+          if(qc_min==1&&zrun<=17){
+            if(zrun<=5){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)c_sign;
+            }
+            else if(zrun<=9){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT1B;
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<2)+zrun-6);
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT1C;
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<3)+zrun-10);
+            }
+            qc_signed[0]=1;
+            qc_signed[1]=-1;
+            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            zrun=0;
+            /*Skip coding the DCT value below.*/
+            continue;
+          }
+          /*CAT2 run/value tokens: the value is 2-3.*/
+          else if(qc_min<=3&&zrun<=3){
+            if(zrun==1){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT2A;
+              qc=OC_MINI(3,qc_max);
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<1)+qc-2);
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT2B;
+              qc=OC_MINI(3,qc_max);
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
+            }
+            qc_signed[0]=qc;
+            qc_signed[1]=-qc;
+            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            zrun=0;
+            /*Skip coding the DCT value below.*/
+            continue;
+          }
+          /*The run is too long or the quantized value too large: code them
+             separately.*/
+          else{
+            /*This is stupid: non-short ZRL tokens are never used for run
+               values less than 9, but codewords are reserved for them,
+               wasting bits.
+              Yes, yes, this would've meant a non-constant number of extra
+               bits for this token, but even so.*/
+            if(zrun<=8){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_SHORT_ZRL_TOKEN;
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_ZRL_TOKEN;
+            }
+            _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+             (ogg_uint16_t)(zrun-1);
+            zrun=0;
+          }
+        }
+        /*No zero run, or the run and the qc value are being coded
+           separately.*/
+        switch(qc_min){
+          case 1:{
+            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+             (unsigned char)(OC_ONE_TOKEN+c_sign);
+            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<1)+1);
+          }break;
+          case 2:{
+            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+             (unsigned char)(OC_TWO_TOKEN+c_sign);
+            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<2)+2);
+          }break;
+          default:{
+            if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
+              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+               (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
+              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
+               (ogg_uint16_t)c_sign;
+              qc_signed[0]=qc_min;
+              qc_signed[1]=-qc_min;
+              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            }
+            else{
+              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+               cati++){
+                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+              }
+              /*qc_min can be encoded in this category.
+                Since all DCT values in the category use the same number of
+                 bits, we encode the closest value to qc_max.
+                This is either qc_max itself, if it is in the category's
+                 range, or the largest value in the category.*/
+              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+              qc_signed[0]=qc;
+              qc_signed[1]=-qc;
+              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+               (unsigned char)(OC_DCT_VAL_CAT3+cati);
+              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
+               ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+            }
+          }
+        }
+      }
+    }
+  }
+  /*If there's a trailing zero run, code an EOB token.*/
+  if(zrun>0){
+    int old_tok;
+    int toki;
+    int ebi;
+    tli=64-zrun;
+    toki=_enc->ndct_tokens[tli]-1;
+    if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
+    else old_tok=-1;
+    /*Try to extend an EOB run.*/
+    switch(old_tok){
+      case OC_DCT_EOB1_TOKEN:
+      case OC_DCT_EOB2_TOKEN:{
+        _enc->dct_tokens[tli][toki]++;
+      }break;
+      case OC_DCT_EOB3_TOKEN:{
+        _enc->dct_tokens[tli][toki]++;
+        _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
+      }break;
+      case OC_DCT_REPEAT_RUN0_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          _enc->extra_bits[tli][ebi]=0;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN1_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          _enc->extra_bits[tli][ebi]=0;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN2_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          /*Again stupid: we could encode runs up to 4127, but inexplicably
+             they don't subtract the bottom of the range here, so we can only
+             go to 4095 (unless we want to change the spec to deal with
+             wrap-around).*/
+          _enc->extra_bits[tli][ebi]=32;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN3_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<4095){
+          _enc->extra_bits[tli][ebi]++;
+          break;
+        }
+        /*else fall through.*/
+      }
+      /*Start a new EOB run.*/
+      default:{
+        _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
+      }
+    }
+  }
+  /*Return the number of coefficients before the final zero run.*/
+  return 64-zrun;
+}
+
+static void oc_enc_vbr_residual_tokenize(oc_enc_ctx *_enc){
+  int *coded_fragi;
+  int *coded_fragi_end;
+  int    pli;
+  int    zzi;
+  /*Clear any existing DCT tokens.*/
+  for(zzi=0;zzi<64;zzi++){
+    _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
+    _enc->extra_bits_offs[zzi]=0;
+  }
+  coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
+  for(pli=0;pli<3;pli++){
+    memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
+     sizeof(_enc->dct_token_offs[pli]));
+    coded_fragi_end+=_enc->state.ncoded_fragis[pli];
+    for(;coded_fragi<coded_fragi_end;coded_fragi++){
+      oc_quant_table       *iquants;
+      oc_fragment          *frag;
+      oc_fragment_enc_info *efrag;
+      ogg_int16_t           qcoeffs[64];
+      int                   fragi;
+      int                   qti;
+      int                   nnzc;
+      fragi=*coded_fragi;
+      frag=_enc->state.frags+fragi;
+      efrag=_enc->frinfo+fragi;
+      qti=frag->mbmode!=OC_MODE_INTRA;
+      iquants=_enc->state.dequant_tables[qti][pli];
+      nnzc=oc_enc_vbr_frag_quant_tokenize(_enc,efrag,qcoeffs,
+       _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
+      /*While we're here and things are in cache, reconstruct the quantized
+         fragment.*/
+      oc_state_frag_recon(&_enc->state,frag,pli,qcoeffs,nnzc,nnzc,
+       iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
+    }
+  }
+  /*Merge the final EOB run of one coefficient list with the start of the
+     next, if possible.*/
+  for(zzi=1;zzi<64;zzi++){
+    static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
+    static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
+    int old_tok1;
+    int old_tok2;
+    int old_eb1;
+    int old_eb2;
+    int new_tok;
+    int toki;
+    int zzj;
+    int ebi;
+    int runl;
+    /*Make sure this coefficient has tokens at all.*/
+    if(_enc->ndct_tokens[zzi]<=0)continue;
+    /*Ensure the first token is an EOB run.*/
+    old_tok2=_enc->dct_tokens[zzi][0];
+    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Search for a previous coefficient that has any tokens at all.*/
+    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+    zzj=zzi-1;
+    do{
+      toki=_enc->ndct_tokens[zzj]-1;
+      if(toki>=_enc->dct_token_offs[0][zzj]){
+        old_tok1=_enc->dct_tokens[zzj][toki];
+        break;
+      }
+    }
+    while(zzj-->0);
+    /*Ensure its last token was an EOB run.*/
+    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Pull off the associated extra bits, if any, and decode the runs.*/
+    ebi=_enc->nextra_bits[zzj];
+    old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?_enc->extra_bits[zzj][--ebi]:0;
+    old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?_enc->extra_bits[zzi][0]:0;
+    runl=OC_EOB_OFFS[old_tok1]+old_eb1+OC_EOB_OFFS[old_tok2]+old_eb2;
+    /*We can't possibly combine these into one run.
+      It might be possible to split them more optimally, but we'll just leave
+       them as is.*/
+    if(runl>=4096)continue;
+    /*We CAN combine them into one run.*/
+    for(new_tok=OC_DCT_EOB1_TOKEN;
+     runl-OC_EOB_OFFS[new_tok]>=OC_EOB_RANGE[new_tok];new_tok++);
+    /*toki is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    _enc->dct_tokens[zzj][toki]=(unsigned char)new_tok;
+    /*Update the two token lists.*/
+    if(OC_DCT_TOKEN_EXTRA_BITS[new_tok]){
+      _enc->extra_bits[zzj][ebi++]=(ogg_uint16_t)(
+       runl-OC_EOB_OFFS[new_tok]);
+    }
+    _enc->nextra_bits[zzj]=ebi;
+    _enc->dct_token_offs[0][zzi]++;
+    /*Note: We don't bother to update the offsets for planes 1 and 2 if
+       planes 0 or 1 don't have any tokens.
+      This turns out not to matter due to the way we use the offsets later.*/
+    if(OC_DCT_TOKEN_EXTRA_BITS[old_tok2])_enc->extra_bits_offs[zzi]++;
+  }
+}
+
+/*Marks each fragment as coded or not, based on the coefficient-level
+   thresholds computed in the psychovisual stage.
+  The MB mode of the fragments are not set, as they will be computed in
+   oc_enc_choose_mbmodes().
+  This also builds up the coded fragment and uncoded fragment lists.
+  The coded MB list is not built up.
+  That is done during mode decision.*/
+static void oc_enc_mark_coded(oc_enc_ctx *_enc){
+  oc_sb *sb;
+  oc_sb *sb_end;
+  int    pli;
+  int    bli;
+  int    ncoded_fragis;
+  int    prev_ncoded_fragis;
+  int    nuncoded_fragis;
+  int    prev_nuncoded_fragis;
+  _enc->nblock_coded_flags=bli=0;
+  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
+  sb=sb_end=_enc->state.sbs;
+  for(pli=0;pli<3;pli++){
+    const oc_fragment_plane *fplane;
+    int                      ystride;
+    int                      prev_refi;
+    fplane=_enc->state.fplanes+pli;
+    sb_end+=fplane->nsbs;
+    prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
+    ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
+    for(;sb<sb_end;sb++){
+      int quadi;
+      sb->coded_fully=1;
+      sb->coded_partially=0;
+      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          int fragi;
+          fragi=sb->map[quadi][bi];
+          if(fragi>=0){
+            oc_fragment *frag;
+            int          flag;
+            frag=_enc->state.frags+fragi;
+            if(frag->invalid){
+              frag->coded=0;
+              *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+            }
+            else{
+              oc_fragment_enc_info *efrag;
+              ogg_int16_t           dct_buf[64];
+              int                   ci;
+              /*Check to see if the fragment can be skipped.
+                It is assumed that a skipped fragment always takes fewer bits
+                 than a coded fragment, though this may not necessarily be true.
+                A single skipped fragment could take up to 34 bits to encode
+                 its location in the RLE scheme Theora uses */
+              oc_frag_intra_fdct(frag,dct_buf,ystride,prev_refi);
+              efrag=_enc->frinfo+fragi;
+              /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
+                 ensures we mark a fragment as skipped if it would be quantized
+                 to all zeros in OC_MODE_INTER_NOMV.
+                These minimum quantizers represent the maximum quality the
+                 format is capable of, and can be larger than our tolerances.
+                The minimum for INTER modes is twice the minimum for INTRA
+                 modes, so technically if the tolerances are below this
+                 threshold, we might be able to do a better job representing
+                 this fragment by coding it in INTRA mode.
+                But the number of extra bits required to do that would be
+                 ridiculous, so we give up our devotion to minimum quality just
+                 this once.
+
+                Note: OC_DC_QUANT_MIN[0] should actually be
+                 OC_DC_QUANT_MIN[1]>>1, but in this case those are
+                 equivalent.*/
+              ci=0;
+              if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
+               OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
+                for(ci++;ci<64;ci++){
+                  if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
+                   OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
+                    break;
+                  }
+                }
+              }
+              if(ci>=64){
+                frag->coded=0;
+                *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+              }
+              else{
+                frag->coded=1;
+                _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+              }
+            }
+            flag=frag->coded;
+            sb->coded_fully&=flag;
+            sb->coded_partially|=flag;
+            _enc->block_coded_flags[bli++]=(char)flag;
+          }
+        }
+      }
+      /*If this is a partially coded super block, keep the entries just added
+         to the code block flag list.*/
+      if(!sb->coded_fully&&sb->coded_partially){
+        _enc->nblock_coded_flags=bli;
+      }
+      /*Otherwise, discard these entries from the list, as they are
+         implicit.*/
+      else{
+        sb->coded_partially=0;
+        bli=_enc->nblock_coded_flags;
+      }
+    }
+    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+    _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
+    prev_nuncoded_fragis=nuncoded_fragis;
+  }
+  _enc->ncoded_frags=ncoded_fragis;
+}
+
+/*Selects an appropriate coding mode for each macro block.
+  A mode is chosen for the macro blocks with at least one coded fragment.
+  A bit cost estimate for coding the frame with the selected modes is made,
+   and a similar estimate is made for coding the frame as a key frame.
+  These estimates are used to select the optimal frame type.
+  Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
+static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_fragment_enc_info   *efrag;
+  oc_fragment            *frag;
+  oc_mb                  *mb;
+  oc_mb_enc_info         *mbinfo;
+  char                    last_mv[2][2];
+  int                    *uncoded_fragi;
+  int                    *uncoded_fragi_end;
+  int                     best_qii;
+  int                     qii;
+  int                     qi;
+  int                     pli;
+  int                     mbi;
+  int                     fragi;
+  int                     ci;
+  int                     nmbs;
+  int                     mvbitsa;
+  int                     mvbitsb;
+  int                     intra_bits;
+  int                     inter_bits;
+  nmbs=_enc->state.nmbs;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+  oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
+  memset(last_mv,0,sizeof(last_mv));
+  mbinfo=_enc->mbinfo;
+  mvbitsa=mvbitsb=0;
+  inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
+  intra_bits=inter_bits+3;
+  _enc->state.ncoded_mbis=0;
+  for(mbi=0;mbi<nmbs;mbi++){
+    mb=_enc->state.mbs+mbi;
+    if(mb->mode!=OC_MODE_INVALID){
+      oc_fragment_enc_info *efrag;
+      char                  bmvs[2][4][2];
+      char                  mbmv[2];
+      int                   err[OC_NMODES][12];
+      int                   bits[OC_NMODES];
+      int                   coded[13];
+      int                   frag_qii[12][2][2];
+      int                   ncoded;
+      int                   ncoded_luma;
+      int                   mapii;
+      int                   mapi;
+      int                   modei;
+      int                   codedi;
+      int                   mbintrabits;
+      int                   mbpmvbitsa;
+      int                   mbgmvbitsa;
+      int                   mb4mvbitsa;
+      int                   mb4mvbitsb;
+      int                   fti;
+      int                   qti;
+      int                   bi;
+      mbinfo=_enc->mbinfo+mbi;
+      /*Build up a list of coded fragments.*/
+      ncoded=0;
+      for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
+        mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
+        fragi=mb->map[mapi>>2][mapi&3];
+        if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      /*If we don't find any, mark this MB not coded and move on.*/
+      if(ncoded<=0){
+        mb->mode=OC_MODE_NOT_CODED;
+        /*Don't bother to do a MV search against the golden frame.
+          Just re-use the last vector, which should match well since the
+           contents of the MB haven't changed much.*/
+        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
+        mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
+        continue;
+      }
+      /*Count the number of coded blocks that are luma blocks, and replace the
+         block MVs for not-coded blocks with (0,0).*/
+      memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
+      /*Mark the end of the list so we don't go past it below.*/
+      coded[ncoded]=-1;
+      for(mapi=ncoded_luma=0;mapi<4;mapi++){
+        if(coded[ncoded_luma]==mapi)ncoded_luma++;
+        else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
+      }
+      /*Select a qi value for each coded fragment for each frame type and
+         quantizer type.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
+        for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
+          best_qii=0;
+          for(qii=1;qii<_enc->nqis[fti];qii++){
+            if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
+             (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
+             _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
+              best_qii=qii;
+            }
+          }
+          frag_qii[codedi][fti][qti]=best_qii;
+        }
+      }
+      /*Special case: If no luma blocks are coded, but some chroma blocks are,
+         then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
+         be explicitly coded for it.*/
+      if(ncoded_luma<=0){
+        mb->mode=OC_MODE_NOT_CODED;
+        /*Don't bother to do a MV search against the golden frame.*/
+        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
+        /*We do collect bitrate stats for frame type decision.*/
+        mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          pli=mapi>>2;
+          fragi=mb->map[pli][mapi&3];
+          frag=_enc->state.frags+fragi;
+          efrag=_enc->frinfo+fragi;
+          /*Set the MB mode and MV in the fragment.*/
+          frag->mbmode=OC_MODE_INTER_NOMV;
+          frag->mv[0]=frag->mv[1]=0;
+          /*Calculate the bitrate estimates.*/
+          err[OC_MODE_INTRA][mapi]=0;
+          for(ci=1;ci<64;ci++){
+            err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+          }
+          err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+           OC_FRAME_PREV);
+          qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+          mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+           OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+          qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+          bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
+           OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
+          /*Also mark this fragment with the selected INTER qi.
+            It will be reset if we eventually code this as an INTRA frame.*/
+#if defined(OC_BITRATE_STATS)
+          efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
+#endif
+          efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
+          frag->qi=qi;
+        }
+        intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+        inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+        continue;
+      }
+      /*Otherwise, add this to the coded MB list.*/
+      _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
+      /*Compute the chroma MVs for the 4MV mode.*/
+      (*set_chroma_mvs)(bmvs[1],bmvs[0]);
+      /*Do a MV search against the golden frame.*/
+      oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
+      /*We are now ready to do mode decision for this macro block.
+        Mode decision is done by exhaustively examining all potential choices.
+        Since we use a minimum-quality encoding strategy, this amounts to
+         simply selecting the mode which uses the smallest number of bits,
+         since the minimum quality will be met in any mode.
+        Obviously, doing the motion compensation, fDCT, tokenization, and then
+         counting the bits each token uses is computationally expensive.
+        Theora's EOB runs can also split the cost of these tokens across
+         multiple fragments, and naturally we don't know what the optimal
+         choice of Huffman codes will be until we know all the tokens we're
+         going to encode in all the fragments.
+
+        So we use a simple approach to estimating the bit cost of each mode
+         based upon the SAD value of the residual.
+        The mathematics behind the technique are outlined by Kim \cite{Kim03},
+         but the process is very simple.
+        For each quality index and SAD value, we have a table containing the
+         average number of bits needed to code a fragment.
+        The SAD values are placed into a small number of bins (currently 16).
+        The bit counts are obtained by examining actual encoded frames, with
+         optimal Huffman codes selected and EOB bits appropriately divided
+         among all the blocks they involve.
+        A separate QIxSAD table is kept for each mode and color plane.
+        It may be possible to combine many of these, but only experimentation
+         will tell which ones truly represent the same distribution.
+
+        @ARTICLE{Kim03,
+          author="Hyun Mun Kim",
+          title="Adaptive Rate Control Using Nonlinear Regression",
+          journal="IEEE Transactions on Circuits and Systems for Video
+           Technology",
+          volume=13,
+          number=5,
+          pages="432--439",
+          month="May",
+          year=2003
+        }*/
+      memset(bits,0,sizeof(bits));
+      mbintrabits=0;
+      /*Find the SAD values for each coded fragment for each possible mode.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        pli=mapi>>2;
+        bi=mapi&3;
+        fragi=mb->map[pli][bi];
+        frag=_enc->state.frags+fragi;
+        efrag=_enc->frinfo+fragi;
+        err[OC_MODE_INTRA][mapi]=0;
+        for(ci=1;ci<64;ci++){
+          err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+        }
+        err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+         OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+         mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
+         pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
+         last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
+         last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
+         bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
+         0,0,pli,OC_FRAME_GOLD);
+        err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+         mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
+         pli,OC_FRAME_GOLD);
+        /*Using these distortion values, estimate the number of bits needed to
+           code this fragment in each mode.*/
+        qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+        mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
+        bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+        for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
+          bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
+           OC_MINI(err[modei][mapi]>>6,15)];
+        }
+      }
+      /*Bit costs are stored in the table with extra precision.
+        Round them down to whole bits here.*/
+      for(modei=0;modei<OC_NMODES;modei++){
+        bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+      }
+      /*Estimate the cost of coding the label for each mode.
+        See comments at oc_mode_scheme_chooser_cost() for a description of the
+         method.*/
+      for(modei=0;modei<OC_NMODES;modei++){
+        bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
+         modei);
+      }
+      /*Add the motion vector bits for each mode that requires them.*/
+      mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
+       mbinfo->mvs[0][OC_FRAME_PREV][1]);
+      mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
+       mbinfo->mvs[0][OC_FRAME_GOLD][1]);
+      mb4mvbitsa=mb4mvbitsb=0;
+      for(codedi=0;codedi<ncoded_luma;codedi++){
+        mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
+         bmvs[0][coded[codedi]][1]);
+        mb4mvbitsb+=12;
+      }
+      /*We use the same opportunity cost method of estimating the cost of
+         coding the motion vectors with the two different schemes as we do for
+         estimating the cost of the mode labels.
+        However, because there are only two schemes and they're both pretty
+         simple, this can just be done inline.*/
+      bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
+       OC_MINI(mvbitsa,mvbitsb);
+      bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
+       OC_MINI(mvbitsa,mvbitsb);
+      bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
+       mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
+      /*Finally, pick the mode with the cheapest estimated bit cost.*/
+      mb->mode=0;
+      for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
+        /*Do not select 4MV mode when not all the luma blocks are coded when
+           we're in VP3 compatibility mode.*/
+        if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
+          continue;
+        }
+        mb->mode=modei;
+      }
+#if defined(OC_BITRATE_STATS)
+      /*Remember the error for the mode we selected in each fragment.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        fragi=mb->map[mapi>>2][mapi&3];
+        efrag=_enc->frinfo+fragi;
+        efrag->eerror=err[mb->mode][mapi];
+      }
+#endif
+      /*Go back and store the selected qi index corresponding to the selected
+         mode in each fragment.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        fragi=mb->map[mapi>>2][mapi&3];
+        frag=_enc->state.frags+fragi;
+        efrag=_enc->frinfo+fragi;
+        efrag->qii=(unsigned char)
+         frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
+        frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
+      }
+      inter_bits+=bits[mb->mode];
+      intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+      oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
+      switch(mb->mode){
+        case OC_MODE_INTER_MV:{
+          mvbitsa+=mbpmvbitsa;
+          mvbitsb+=12;
+          last_mv[1][0]=last_mv[0][0];
+          last_mv[1][1]=last_mv[0][1];
+          mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
+          mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
+        }break;
+        case OC_MODE_INTER_MV_LAST:{
+          mbmv[0]=last_mv[0][0];
+          mbmv[1]=last_mv[0][1];
+        }break;
+        case OC_MODE_INTER_MV_LAST2:{
+          mbmv[0]=last_mv[1][0];
+          mbmv[1]=last_mv[1][1];
+          last_mv[1][0]=last_mv[0][0];
+          last_mv[1][1]=last_mv[0][1];
+          last_mv[0][0]=mbmv[0];
+          last_mv[0][1]=mbmv[1];
+        }break;
+        case OC_MODE_INTER_MV_FOUR:{
+          mvbitsa+=mb4mvbitsa;
+          mvbitsb+=mb4mvbitsb;
+          if(ncoded_luma>0){
+            /*After 4MV mode, the last MV is the one from the last coded luma
+               block.*/
+            last_mv[1][0]=last_mv[0][0];
+            last_mv[1][1]=last_mv[0][1];
+            last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
+            last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
+          }
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mvbitsa+=mbgmvbitsa;
+          mvbitsb+=12;
+          mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
+          mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
+        }break;
+      }
+      if(OC_MODE_HAS_MV[mb->mode]){
+        /*Special case 4MV mode.
+          MVs are stored in bmvs.*/
+        if(mb->mode==OC_MODE_INTER_MV_FOUR){
+          for(codedi=0;codedi<ncoded;codedi++){
+            mapi=coded[codedi];
+            pli=mapi>>2;
+            bi=mapi&3;
+            fragi=mb->map[pli][bi];
+            frag=_enc->state.frags+fragi;
+            frag->mbmode=mb->mode;
+            frag->mv[0]=bmvs[!!pli][bi][0];
+            frag->mv[1]=bmvs[!!pli][bi][1];
+          }
+        }
+        /*For every other mode with a MV, it is stored in mbmv.*/
+        else{
+          for(codedi=0;codedi<ncoded;codedi++){
+            mapi=coded[codedi];
+            fragi=mb->map[mapi>>2][mapi&3];
+            frag=_enc->state.frags+fragi;
+            frag->mbmode=mb->mode;
+            frag->mv[0]=mbmv[0];
+            frag->mv[1]=mbmv[1];
+          }
+        }
+      }
+      /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
+      else{
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb->map[mapi>>2][mapi&3];
+          frag=_enc->state.frags+fragi;
+          frag->mbmode=mb->mode;
+          frag->mv[0]=frag->mv[1]=0;
+        }
+      }
+    }
+  }
+  /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
+  if(mvbitsb<mvbitsa){
+    _enc->mv_scheme=1;
+    inter_bits+=mvbitsb;
+  }
+  else{
+    _enc->mv_scheme=0;
+    inter_bits+=mvbitsa;
+  }
+  inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
+   _enc->mode_scheme_chooser.scheme_list[0]];
+  /*The easiest way to count the bits needed for coded/not coded fragments is
+     to code them.
+    We need to do this anyway, might as well do it now.*/
+  oggpackB_reset(&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_partial_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_coded_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_coded_block_flags_pack(_enc,&_enc->opb_coded_flags);
+  /*Select the quantizer list for INTER frames.*/
+  _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
+  }
+  if(intra_bits>inter_bits){
+    _enc->vbr->est_bits=inter_bits;
+    return OC_INTER_FRAME;
+  }
+  /*All INTRA mode is smaller, but we haven't counted up the cost of all the
+     not coded fragments we will now have to code.*/
+  uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
+  for(pli=0;pli<3;pli++){
+    uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
+    while(uncoded_fragi-->uncoded_fragi_end){
+      fragi=*uncoded_fragi;
+      frag=_enc->state.frags+fragi;
+      /*Assume a very small bit cost for invalid fragments.*/
+      if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
+      else{
+        int eerror;
+        eerror=0;
+        efrag=_enc->frinfo+fragi;
+        for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+#if defined(OC_BITRATE_STATS)
+        efrag->eerror=eerror;
+#endif
+        qi=_enc->qis[OC_INTRA_FRAME][0];
+        for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
+          if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
+           efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
+            qi=_enc->qis[OC_INTRA_FRAME][qii];
+          }
+        }
+        intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(eerror>>8,15)];
+        /*If it turns out INTRA mode was more expensive, we're done.*/
+        if(intra_bits>inter_bits){
+          _enc->vbr->est_bits=inter_bits;
+          return OC_INTER_FRAME;
+        }
+      }
+    }
+  }
+  /*So, we've compared the full cost estimates, and INTRA is still better.
+    Code an INTRA frame instead.*/
+  oc_enc_vbr_mark_all_intra(_enc);
+  _enc->vbr->est_bits=intra_bits;
+  return OC_INTRA_FRAME;
+}
+
+/*A pipeline stage for transforming, quantizing, and tokenizing the frame.*/
+
+static int oc_vbr_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return 0;
+}
+
+static int oc_vbr_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+  return 0;
+}
+
+static int oc_vbr_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx *enc;
+  int         ret;
+  enc=_stage->enc;
+  if(enc->state.curframe_num==0||
+   enc->state.curframe_num-enc->state.keyframe_num>=
+   enc->keyframe_frequency_force){
+    enc->state.frame_type=OC_INTRA_FRAME;
+    oc_enc_vbr_quant_sel_quality(enc,1);
+    oc_enc_vbr_mark_all_intra(enc);
+  }
+  else{
+    oc_enc_mark_coded(enc);
+    /*Only proceed if we have some coded blocks.
+      No coded blocks -> dropped frame -> 0 byte packet.*/
+    if(enc->ncoded_frags>0){
+      oc_enc_vbr_quant_sel_quality(enc,0);
+      enc->state.frame_type=oc_enc_choose_mbmodes(enc);
+      if(enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(enc);
+    }
+  }
+  /*Only initialize subsequent stages after we know how many fragments will be
+     encoded, and at what quality (so the loop filter can be set up
+     properly).*/
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_start)(_stage->next);
+    if(ret<0)return ret;
+  }
+  if(enc->ncoded_frags>0){
+    /*TODO: These stages could be pipelined with reconstruction.*/
+    oc_enc_vbr_quant_dc(enc);
+    oc_enc_vbr_residual_tokenize(enc);
+  }
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    if(ret<0)return ret;
+    return (*_stage->next->pipe_end)(_stage->next);
+  }
+  return 0;
+}
+
+/*Initialize the transform, quantization, and tokenization stage of the
+   pipeline.
+  _enc: The encoding context.*/
+static void oc_vbr_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_vbr_pipe_start;
+  _stage->pipe_proc=oc_vbr_pipe_process;
+  _stage->pipe_end=oc_vbr_pipe_end;
+}
+
+
+static int oc_enc_vbr_init(oc_enc_vbr_ctx *_vbr,oc_enc_ctx *_enc){
+  _vbr->cfg.qi=_enc->state.info.quality;
+  _vbr->cfg.kf_qi_min=_vbr->cfg.df_qi_min=0;
+  _vbr->cfg.kf_qi_max=_vbr->cfg.df_qi_max=63;
+  _vbr->enc=_enc;
+  _vbr->impmap=oc_impmap_alloc(_enc);
+  _vbr->psych=oc_psych_alloc(_enc);
+  oc_vbr_pipe_init(&_vbr->pipe,_enc);
+  return 0;
+}
+
+static void oc_enc_vbr_clear(oc_enc_vbr_ctx *_vbr){
+  oc_psych_free(_vbr->psych);
+  oc_impmap_free(_vbr->impmap);
+}
+
+static int oc_enc_vbr_cfg(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+  if(_cfg->qi<0||_cfg->qi>63||_cfg->kf_qi_min<0||_cfg->kf_qi_min>63||
+   _cfg->kf_qi_max<_cfg->kf_qi_min||_cfg->kf_qi_max>63||
+   _cfg->df_qi_min<0||_cfg->df_qi_min>63||
+   _cfg->df_qi_max<_cfg->df_qi_min||_cfg->df_qi_max>63){
+    return -OC_EINVAL;
+  }
+  memcpy(&_vbr->cfg,_cfg,sizeof(_vbr->cfg));
+  return 0;
+}
+
+static oc_enc_pipe_stage *oc_enc_vbr_create_pipe(oc_enc_vbr_ctx *_vbr){
+  oc_enc_pipe_stage *pipe;
+  _vbr->enc->fill_pipe.next=&_vbr->enc->pack_pipe;
+  _vbr->pipe.next=&_vbr->enc->copy_pipe;
+  /*TODO: Disable spatial masking and CSF filtering based on
+     application-specified speed level.*/
+  pipe=oc_psych_prepend_to_pipe(_vbr->psych,&_vbr->pipe);
+  _vbr->enc->fdct_pipe.next=pipe;
+  /*TODO: Disable impmap based on application-specified speed level.*/
+  pipe=oc_impmap_prepend_to_pipe(_vbr->impmap,&_vbr->enc->fdct_pipe);
+  pipe=oc_mcenc_prepend_to_pipe(_vbr->enc->mcenc,pipe);
+  return pipe;
+}
+
+
+oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc){
+  oc_enc_vbr_ctx *vbr;
+  vbr=(oc_enc_vbr_ctx *)_ogg_malloc(sizeof(*vbr));
+  oc_enc_vbr_init(vbr,_enc);
+  return vbr;
+}
+
+void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr){
+  if(_vbr!=NULL){
+    oc_enc_vbr_clear(_vbr);
+    _ogg_free(_vbr);
+  }
+}
+
+int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+  if(_cfg!=NULL){
+    int ret;
+    ret=oc_enc_vbr_cfg(_vbr,_cfg);
+    if(ret<0)return ret;
+  }
+  /*Map the qi to a multiple of JND values.*/
+  _vbr->qscale=_vbr->cfg.qi>=63?0.5F:1.5F*OC_POWF(2,0.0625F*(64-_vbr->cfg.qi));
+  _vbr->enc->pipe=oc_enc_vbr_create_pipe(_vbr);
+  /*TODO: Implement a real speed level.*/
+  _vbr->enc->speed_max=0;
+  _vbr->enc->set_speed=oc_enc_set_speed_null;
+  return 0;
+}

Added: experimental/derf/theora-exp/lib/encvbr.h
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encvbr.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,43 @@
+#if !defined(_encvbr_H)
+# define _encvbr_H (1)
+# include "encint.h"
+
+
+
+typedef struct oc_impmap_ctx oc_impmap_ctx;
+typedef struct oc_psych_ctx  oc_psych_ctx;
+
+
+
+/*Context information for the VBR encoder.*/
+struct oc_enc_vbr_ctx{
+  /*Configuration information.*/
+  theora_vbr_cfg     cfg;
+  /*The main VBR encoder's pipe stage.*/
+  oc_enc_pipe_stage  pipe;
+  /*The scale factor for the current quality setting.*/
+  float              qscale;
+  /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
+  unsigned           dc_tol_mins[3];
+  /*The estimated bit cost of the current frame.*/
+  int                est_bits;
+  /*The encode context.*/
+  oc_enc_ctx        *enc;
+  /*Context information used to generate the importance map.*/
+  oc_impmap_ctx     *impmap;
+  /*Context information used to generate low-level perceptual weightings.*/
+  oc_psych_ctx      *psych;
+};
+
+
+oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
+void oc_impmap_free(oc_impmap_ctx *_impmap);
+oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
+ oc_enc_pipe_stage *_next);
+
+oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
+void oc_psych_free(oc_psych_ctx *_psych);
+oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
+ oc_enc_pipe_stage *_next);
+
+#endif

Modified: experimental/derf/theora-exp/lib/fdct.c
===================================================================
--- experimental/derf/theora-exp/lib/fdct.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/fdct.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -316,3 +316,131 @@
   if(cext==NULL)for(in=w,out=_y,end=out+8;out<end;in+=8,out++)fdct8(out,in);
   else for(in=w,out=_y,end=out+8;out<end;in+=8,out++)fdct8_ext(out,in,cext);
 }
+
+/*Performs an fDCT on a given fragment.
+  _frag:     The fragment to perform the 2D DCT on.
+  _dct_vals: The output buffer for the DCT coefficients.
+  _ystride:  The Y stride of the plane the fragment belongs to.
+  _framei:   The picture buffer index to perform the DCT on.
+             Use OC_FRAME_IO for the current input frame.*/
+void oc_frag_intra_fdct(const oc_fragment *_frag,ogg_int16_t _dct_vals[64],
+ int _ystride,int _framei){
+  ogg_int16_t    pix_buf[64];
+  unsigned char *pixels;
+  int            pixi;
+  int            y;
+  int            x;
+  /*NOTE: 128 is subtracted from each pixel value to make it signed.
+    The original VP3 source claimed that, "this reduces the internal precision
+     requirments [sic] in the DCT transform."
+    This is of course not actually true.
+    The transform must still support input in the range [-255,255] to code
+     predicted fragments, since the same transform is used for both.
+    This actually _reduces_ the precision of the results, because larger
+     (absolute) values would have fewer significant bits chopped off when
+     rounding.
+    We're stuck with it, however.
+    At least it might reduce bias towards 0 when coding unpredicted DC
+     coefficients, but that's not what VP3 justified it with.*/
+  pixels=_frag->buffer[_framei];
+  /*For border fragments, only copy pixels that are in the displayable
+     region of the image.
+    The DCT function will compute optimal padding values for the other
+     pixels.*/
+  if(_frag->border!=NULL){
+    ogg_int64_t mask;
+    mask=_frag->border->mask;
+    for(pixi=y=0;y<8;y++){
+      for(x=0;x<8;x++,pixi++){
+        pix_buf[pixi]=(ogg_int16_t)(((int)mask&1)?pixels[x]-128:0);
+        /*This branchless code is (almost) equivalent to the previous line:
+            int pmask;
+            pmask=-(int)mask&1;
+            pix_buf[pixi]=(ogg_int16_t)(pmask&pixels[x]);
+          We don't use this code to allow the user to pass in a buffer that is
+           the exact size of the displayed image, not the size padded to a
+           multiple of 16.
+          In the latter case, we might segfault on pixels[x] if it is not
+           mapped to a valid page, even though we would discard the value
+           we were attempting to read.*/
+        mask>>=1;
+      }
+      pixels+=_ystride;
+    }
+    oc_fdct8x8_border(_frag->border,_dct_vals,pix_buf);
+  }
+  /*Otherwise, copy all the pixels in the fragment and do a normal DCT.*/
+  else{
+    for(pixi=y=0;y<8;y++){
+      for(x=0;x<8;x++,pixi++)pix_buf[pixi]=(ogg_int16_t)(pixels[x]-128);
+      pixels+=_ystride;
+    }
+    oc_fdct8x8(_dct_vals,pix_buf);
+  }
+}
+
+/*A pipline stage for applying an fDCT to each (non-motion compensated) block
+   in a frame.*/
+
+static int oc_fdct_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+/*Does the fDCTs.
+  This pipeline stage proceeds in a planar fashion.*/
+static int oc_fdct_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    int y_procd;
+    int y_avail;
+    /*Compute how far we can get in complete fragment rows.*/
+    y_procd=_stage->y_procd[pli];
+    y_avail=_y_avail[pli]&~7;
+    /*If that's farther than we've already gotten, do some fDCTs.*/
+    if(y_avail>y_procd){
+      oc_fragment_plane    *fplane;
+      oc_fragment          *frags;
+      oc_fragment          *frag_end;
+      oc_fragment_enc_info *efrags;
+      int                   ystride;
+      int                   yfrag0;
+      int                   yrows;
+      fplane=_stage->enc->state.fplanes+pli;
+      ystride=_stage->enc->state.input[pli].ystride;
+      yfrag0=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+      yrows=y_avail-y_procd>>3;
+      frags=_stage->enc->state.frags+yfrag0;
+      efrags=_stage->enc->frinfo+yfrag0;
+      do{
+        for(frag_end=frags+fplane->nhfrags;frags<frag_end;frags++,efrags++){
+          oc_frag_intra_fdct(frags,efrags->dct_coeffs,ystride,OC_FRAME_IO);
+        }
+        _stage->y_procd[pli]+=8;
+        if(_stage->next!=NULL){
+          int ret;
+          ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+          if(ret<0)return ret;
+        }
+      }
+      while(--yrows);
+    }
+  }
+  return 0;
+}
+
+static int oc_fdct_pipe_end(oc_enc_pipe_stage *_stage){
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+
+/*Initialize the fDCT stage of the pipeline.
+  _enc: The encoding context.*/
+void oc_fdct_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_fdct_pipe_start;
+  _stage->pipe_proc=oc_fdct_pipe_process;
+  _stage->pipe_end=oc_fdct_pipe_end;
+}

Modified: experimental/derf/theora-exp/lib/fdct.h
===================================================================
--- experimental/derf/theora-exp/lib/fdct.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/fdct.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -6,5 +6,8 @@
 void oc_fdct8x8(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
 void oc_fdct8x8_border(const oc_border_info *_border,ogg_int16_t _y[64],
  ogg_int16_t _x[64]);
+void oc_frag_intra_fdct(const oc_fragment *_frag,ogg_int16_t _dct_vals[64],
+ int _ystride,int _framei);
+void oc_fdct_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc);
 
 #endif

Modified: experimental/derf/theora-exp/lib/huffdec.c
===================================================================
--- experimental/derf/theora-exp/lib/huffdec.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/huffdec.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -84,7 +84,7 @@
   if(!bits){
     int ret;
     binode=oc_huff_node_alloc(1);
-    binode->depth=_depth>1;
+    binode->depth=(unsigned char)(_depth>1);
     ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
     if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
     if(ret<0){
@@ -96,7 +96,7 @@
   else{
     if(theora_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return OC_BADHEADER;
     binode=oc_huff_node_alloc(0);
-    binode->depth=_depth>1;
+    binode->depth=(unsigned char)(_depth>1);
     binode->token=(unsigned char)bits;
   }
   *_binode=binode;
@@ -112,7 +112,6 @@
 static int oc_huff_tree_mindepth(oc_huff_node *_binode){
   int depth0;
   int depth1;
-  int cdepth;
   if(_binode->nbits==0)return 0;
   depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
   depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
@@ -127,9 +126,6 @@
   Return: The number of entries that would be contained in a jump table of the
            given depth.*/
 static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
-  int depth0;
-  int depth1;
-  int cdepth;
   if(_binode->nbits==0||_depth<=0)return 1;
   else{
     return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
@@ -156,7 +152,7 @@
  oc_huff_node *_binode,int _level,int _depth){
   if(_level<=0||_binode->nbits==0){
     int i;
-    _binode->depth=_depth-_level;
+    _binode->depth=(unsigned char)(_depth-_level);
     _nodes[0]=oc_huff_tree_collapse(_binode);
     for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
   }
@@ -176,13 +172,10 @@
   Return: The new root of the collapsed sub-tree.*/
 static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){
   oc_huff_node *root;
-  int           nchildren;
   int           mindepth;
   int           depth;
   int           loccupancy;
   int           occupancy;
-  int           i;
-  int           inext;
   depth=mindepth=oc_huff_tree_mindepth(_binode);
   occupancy=1<<mindepth;
   do{

Modified: experimental/derf/theora-exp/lib/impmap.c
===================================================================
--- experimental/derf/theora-exp/lib/impmap.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/impmap.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -343,6 +343,8 @@
 
 /*Importance map context information.*/
 struct oc_impmap_ctx{
+  /*The pipeline stage.*/
+  oc_enc_pipe_stage                  pipe;
   /*Segmentation information for the Y plane.
     Some day we may also wish to segment the chroma planes, or derive a
      segmentation using all 3 planes at once.*/
@@ -2106,42 +2108,7 @@
   }
   return ret;
 }
-
-oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc){
-  theora_info   *info;
-  oc_impmap_ctx *impmap;
-  int            edge_sz;
-  int            width;
-  int            height;
-  info=&_enc->state.info;
-  width=info->pic_width;
-  height=info->pic_height;
-  impmap=(oc_impmap_ctx *)_ogg_malloc(sizeof(*impmap));
-  oc_seg_init(&impmap->seg,width,height);
-  impmap->inv_region_sz_max=100.0F/(width*height);
-  edge_sz=width>1?height>1?(width-2<<1)+(height<<1):width:height;
-  impmap->inv_edge_sz_max=2.0F/edge_sz;
-  impmap->pic_x=info->pic_x;
-  impmap->pic_y=info->pic_y;
-  impmap->imp_avg=0.5F;
-  /*Allocate space for the region stats and neighbor links.*/
-  impmap->regions=(oc_impmap_region *)_ogg_malloc(
-   impmap->seg.cregions*sizeof(impmap->regions[0]));
-  impmap->enc=_enc;
-  impmap->chroma_frag_weight=
-   OC_IMPMAP_CHROMA_FRAG_WEIGHT_TABLE[_enc->state.info.pixel_fmt];
-  return impmap;
-}
-
-void oc_impmap_free(oc_impmap_ctx *_impmap){
-  if(_impmap!=NULL){
-    oc_seg_clear(&_impmap->seg);
-    _ogg_free(_impmap->regions);
-    _ogg_free(_impmap);
-  }
-}
-
-void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration){
+static void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration){
   theora_img_plane yplane;
   float            imp_sum;
   int              img_offset;
@@ -2229,3 +2196,92 @@
   }
 #endif
 }
+
+
+/*The importance map pipeline stage.
+  For now, for simplicity, this is not actually pipelined.
+  The quadtree segmentation algorithm does not really lend itself to it, and
+   even if an online segmentation algorithm were used, a full stall would be
+   created by the need to gather statistics over all the regions to assign
+   weights to any of them.*/
+
+static int oc_impmap_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return 0;
+}
+
+static int oc_impmap_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+  return 0;
+}
+
+static int oc_impmap_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx *enc;
+  enc=_stage->enc;
+  oc_impmap_fill(enc->vbr->impmap,
+   enc->state.info.fps_denominator/(float)enc->state.info.fps_numerator);
+  if(_stage->next!=NULL){
+    int ret;
+    ret=(*_stage->next->pipe_start)(_stage->next);
+    if(ret<0)return ret;
+    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    if(ret<0)return ret;
+    return (*_stage->next->pipe_end)(_stage->next);
+  }
+  return 0;
+}
+
+/*Initialize the importance map stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_impmap_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_impmap_pipe_start;
+  _stage->pipe_proc=oc_impmap_pipe_process;
+  _stage->pipe_end=oc_impmap_pipe_end;
+}
+
+
+oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc){
+  theora_info   *info;
+  oc_impmap_ctx *impmap;
+  int            edge_sz;
+  int            width;
+  int            height;
+  info=&_enc->state.info;
+  width=info->pic_width;
+  height=info->pic_height;
+  impmap=(oc_impmap_ctx *)_ogg_malloc(sizeof(*impmap));
+  oc_seg_init(&impmap->seg,width,height);
+  impmap->inv_region_sz_max=100.0F/(width*height);
+  edge_sz=width>1?height>1?(width-2<<1)+(height<<1):width:height;
+  impmap->inv_edge_sz_max=2.0F/edge_sz;
+  impmap->pic_x=info->pic_x;
+  impmap->pic_y=info->pic_y;
+  impmap->imp_avg=0.5F;
+  /*Allocate space for the region stats and neighbor links.*/
+  impmap->regions=(oc_impmap_region *)_ogg_malloc(
+   impmap->seg.cregions*sizeof(impmap->regions[0]));
+  impmap->enc=_enc;
+  impmap->chroma_frag_weight=
+   OC_IMPMAP_CHROMA_FRAG_WEIGHT_TABLE[_enc->state.info.pixel_fmt];
+  oc_impmap_pipe_init(&impmap->pipe,_enc);
+  return impmap;
+}
+
+void oc_impmap_free(oc_impmap_ctx *_impmap){
+  if(_impmap!=NULL){
+    oc_seg_clear(&_impmap->seg);
+    _ogg_free(_impmap->regions);
+    _ogg_free(_impmap);
+  }
+}
+
+
+oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
+ oc_enc_pipe_stage *_next){
+  _impmap->pipe.next=_next;
+  return &_impmap->pipe;
+}

Modified: experimental/derf/theora-exp/lib/mcenc.c
===================================================================
--- experimental/derf/theora-exp/lib/mcenc.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/mcenc.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -4,12 +4,13 @@
 #include "encint.h"
 
 struct oc_mcenc_ctx{
-  oc_enc_ctx *enc;
-  int         candidates[12][2];
-  int         setb0;
-  int         ncandidates;
-  ogg_int32_t mvapw1;
-  ogg_int32_t mvapw2;
+  oc_enc_ctx        *enc;
+  oc_enc_pipe_stage  pipe;
+  int                candidates[12][2];
+  int                setb0;
+  int                ncandidates;
+  ogg_int32_t        mvapw1[2];
+  ogg_int32_t        mvapw2[2];
 };
 
 /*The maximum Y plane SAD value for accepting the median predictor.*/
@@ -68,6 +69,8 @@
  int _which_frame){
   oc_mb_enc_info *nemb;
   oc_mb_enc_info *emb;
+  ogg_int32_t     mvapw1;
+  ogg_int32_t     mvapw2;
   int             a[3][2];
   int             ncandidates;
   int             i;
@@ -116,15 +119,17 @@
   /*Fill in set B: accelerated predictors for this and adjacent macro
      blocks.*/
   _mcenc->setb0=ncandidates;
+  mvapw1=_mcenc->mvapw1[_which_frame];
+  mvapw2=_mcenc->mvapw2[_which_frame];
   /*The first time through the loop use the current macro block.*/
   nemb=emb;
   for(i=0;;i++){
     _mcenc->candidates[ncandidates][0]=
-     OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][0]*_mcenc->mvapw1-
-     nemb->mvs[2][_which_frame][0]*_mcenc->mvapw2,16,0x8000);
+     OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][0]*mvapw1-
+     nemb->mvs[2][_which_frame][0]*mvapw2,16,0x8000);
     _mcenc->candidates[ncandidates][1]=
-     OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][1]*_mcenc->mvapw1-
-     nemb->mvs[2][_which_frame][1]*_mcenc->mvapw2,16,0x8000);
+     OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][1]*mvapw1-
+     nemb->mvs[2][_which_frame][1]*mvapw2,16,0x8000);
     _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
      _mcenc->candidates[ncandidates][0],31);
     _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
@@ -327,17 +332,6 @@
   return _best_err;
 }
 
-oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc){
-  oc_mcenc_ctx *mcenc;
-  mcenc=_ogg_calloc(1,sizeof(*mcenc));
-  mcenc->enc=_enc;
-  return mcenc;
-}
-
-void oc_mcenc_free(oc_mcenc_ctx *_mcenc){
-  _ogg_free(_mcenc);
-}
-
 /*Perform a motion vector search for this macro block against a single
    reference frame.
   As a bonus, individual block motion vectors are computed as well, as much of
@@ -351,7 +345,7 @@
   _error:    Returns the prediction error for the macro block motion vector.
   _error4mv: Returns sum of the prediction error for the individual block
               motion vectors.*/
-void oc_mcenc_search(oc_mcenc_ctx *_mcenc,int _mbi,int _frame,
+static void oc_mcenc_search(oc_mcenc_ctx *_mcenc,int _mbi,int _frame,
  char _bmvs[4][2],int *_error,int *_error4mv){
   oc_mb_enc_info *embs;
   oc_mb_enc_info *emb;
@@ -562,7 +556,8 @@
    reference frame.
   The actual motion vector is stored in the appropriate place in the
    oc_mb_enc_info structure.
-  Block-level motion vectors are not computed.
+  This is like the above oc_mcenc_search() routine, except that block-level
+   motion vectors are not computed.
   _mcenc:    The motion compensation context.
   _mbi:      The macro block index.
   _frame:    The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.
@@ -682,29 +677,109 @@
   return best_err;
 }
 
-void oc_mcenc_analyze(oc_mcenc_ctx *_mcenc){
-  oc_mb_enc_info *embs;
-  oc_mb          *mbs;
-  ogg_int64_t     nframes;
-  int             nmbs;
-  int             mbi;
-  /*If there is no previous frame, then skip motion analysis: Every vector has
-     been initialized to (0,0).*/
-  if(_mcenc->enc->state.ref_frame_idx[OC_FRAME_PREV]<0)return;
-  /*Set up the accelerated MV weights for previous frame prediction.*/
-  _mcenc->mvapw1=(ogg_int32_t)1<<17;
-  _mcenc->mvapw2=(ogg_int32_t)1<<16;
-  mbs=_mcenc->enc->state.mbs;
-  embs=_mcenc->enc->mbinfo;
-  nmbs=_mcenc->enc->state.fplanes[0].nsbs<<2;
-  for(mbi=0;mbi<nmbs;mbi++)if(mbs[mbi].mode!=OC_MODE_INVALID){
+/*A pipe to perform a motion vector search for each macro block.*/
+
+static int oc_mcenc_pipe_start(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx  *enc;
+  ogg_int64_t  nframes;
+  int          pli;
+  int          mbi;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  /*Move the motion vector predictors back a frame.
+    We could pipeline this, too, but it's probably not worth it.*/
+  enc=_stage->enc;
+  for(mbi=enc->state.fplanes[0].nsbs<<2;mbi-->0;){
     oc_mb_enc_info *emb;
-    emb=embs+mbi;
-    oc_mcenc_search(_mcenc,mbi,OC_FRAME_PREV,emb->bmvs,&emb->aerror,
-     &emb->aerror4mv);
+    emb=enc->mbinfo+mbi;
+    memmove(emb->mvs+1,emb->mvs,2*sizeof(emb->mvs[0]));
   }
+  /*Set up the accelerated MV weights for previous frame prediction.*/
+  enc->mcenc->mvapw1[OC_FRAME_PREV]=(ogg_int32_t)1<<17;
+  enc->mcenc->mvapw2[OC_FRAME_PREV]=(ogg_int32_t)1<<16;
   /*Set up the accelerated MV weights for golden frame prediction.*/
-  nframes=_mcenc->enc->state.curframe_num-_mcenc->enc->state.keyframe_num;
-  _mcenc->mvapw1=(ogg_int32_t)(nframes!=1?(nframes<<17)/(nframes-1):0);
-  _mcenc->mvapw2=(ogg_int32_t)(nframes!=2?(nframes<<16)/(nframes-2):0);
+  nframes=enc->state.curframe_num-enc->state.keyframe_num;
+  enc->mcenc->mvapw1[OC_FRAME_GOLD]=(ogg_int32_t)(
+   nframes!=1?(nframes<<17)/(nframes-1):0);
+  enc->mcenc->mvapw2[OC_FRAME_GOLD]=(ogg_int32_t)(
+   nframes!=2?(nframes<<16)/(nframes-2):0);
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
 }
+
+static int oc_mcenc_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  oc_mcenc_ctx   *mcenc;
+  int             pli;
+  mcenc=_stage->enc->mcenc;
+  /*For now we ignore the chroma planes.*/
+  for(pli=1;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+  /*Only do motion analysis if there is a previous frame; otherwise every
+     vector has already been initialized to (0,0).*/
+  if(mcenc->enc->state.ref_frame_idx[OC_FRAME_PREV]>=0){
+    int             y_avail;
+    y_avail=_y_avail[0];
+    /*Round to a super-block row, except for the last one, which may be
+       incomplete.*/
+    if(y_avail<(int)mcenc->enc->state.info.frame_height)y_avail&=~31;
+    while(_stage->y_procd[0]<y_avail){
+      oc_mb_enc_info *embs;
+      oc_mb          *mbs;
+      int             mbi;
+      int             mbi_end;
+      mbi=(_stage->y_procd[0]>>4)*mcenc->enc->state.fplanes[0].nhsbs;
+      mbi_end=mbi+mcenc->enc->state.fplanes[0].nhsbs<<1;
+      mbs=mcenc->enc->state.mbs;
+      embs=mcenc->enc->mbinfo;
+      for(;mbi<mbi_end;mbi++)if(mbs[mbi].mode!=OC_MODE_INVALID){
+        oc_mb_enc_info *emb;
+        emb=embs+mbi;
+        oc_mcenc_search(mcenc,mbi,OC_FRAME_PREV,emb->bmvs,&emb->aerror,
+         &emb->aerror4mv);
+      }
+      /*Chain to the next stage.*/
+      _stage->y_procd[0]=OC_MINI(_stage->y_procd[0]+32,y_avail);
+      if(_stage->next!=NULL){
+        int ret;
+        ret=_stage->next->pipe_proc(_stage->next,_stage->y_procd);
+        if(ret<0)return ret;
+      }
+    }
+  }
+  else{
+    _stage->y_procd[0]=_y_avail[0];
+    if(_stage->next!=NULL){
+      return _stage->next->pipe_proc(_stage->next,_stage->y_procd);
+    }
+  }
+  return 0;
+}
+
+static int oc_mcenc_pipe_end(oc_enc_pipe_stage *_stage){
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the motion vector search stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_mcenc_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_mcenc_pipe_start;
+  _stage->pipe_proc=oc_mcenc_pipe_process;
+  _stage->pipe_end=oc_mcenc_pipe_end;
+}
+
+oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc){
+  oc_mcenc_ctx *mcenc;
+  mcenc=_ogg_calloc(1,sizeof(*mcenc));
+  mcenc->enc=_enc;
+  oc_mcenc_pipe_init(&mcenc->pipe,_enc);
+  return mcenc;
+}
+
+void oc_mcenc_free(oc_mcenc_ctx *_mcenc){
+  _ogg_free(_mcenc);
+}
+
+oc_enc_pipe_stage *oc_mcenc_prepend_to_pipe(oc_mcenc_ctx *_mcenc,
+ oc_enc_pipe_stage *_next){
+  _mcenc->pipe.next=_next;
+  return &_mcenc->pipe;
+}

Modified: experimental/derf/theora-exp/lib/ocintrin.h
===================================================================
--- experimental/derf/theora-exp/lib/ocintrin.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/ocintrin.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -21,6 +21,12 @@
 
 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
+/*Clamps an integer into the given range.
+  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+   behavior is required to meet our documented API behavior).
+  _a: The lower bound.
+  _b: The value to clamp.
+  _c: The upper boud.*/
 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
 #define OC_CLAMP255(_x)     (oc_clamp255(_x))
 /*Divides an integer by a power of two, truncating towards 0.

Modified: experimental/derf/theora-exp/lib/psych.c
===================================================================
--- experimental/derf/theora-exp/lib/psych.c	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/psych.c	2005-09-18 00:58:06 UTC (rev 10030)
@@ -1,7 +1,7 @@
 #include <float.h>
 #include <math.h>
 #include <string.h>
-#include "encint.h"
+#include "psych.h"
 
 /*This is where we attempt to model low-level vision processes, such as
    sensitivities due to the way human eyes are constructed and how data flows
@@ -1699,7 +1699,7 @@
    value might be.
   In Part II, Annex E, Section 2, it discusses the "point-wise extended
    non-linearity" on which Nadenau's IaCLA-2 method is based.
-  There it suggests using a parameter a which would case k_L to have an
+  There it suggests using a parameter a which would cause k_L to have an
    equivalent value of 2^{component_bit_depth-1}*1E-4.
   Nadenau reports, however, that the standard specifies a value of 1E-4
    directly.
@@ -1737,8 +1737,8 @@
    a wavelet decomposition on the diagonal bands, which can lead to excessive
    horizontal and vertical masking around diagonal edges and decreased
    diagonal masking.
-  The DCT has smaller frequency bands, than wavelets, and so we can get a
-   slight improvement by moving some coefficients around in the high frequency
+  The DCT has smaller frequency bands than wavelets, and so we can get a slight
+   improvement by moving some coefficients around in the high frequency
    channels.
 
   Each coefficient uses a combination of nearby coefficients in the same DCT
@@ -1841,6 +1841,7 @@
 /*This parameter, r_{csf}^\nu, compensates for the dyanmic range of the DCT
    coefficients.*/
 #define OC_MASK_R_CSF_NU      (8.8388347648318440550E-2F)
+/*#define OC_MASK_R_CSF_NU      (0.37892914162759952059F)*/
 
 /*This parameter, k_L^\nu, determines the dynamic range of the neighborhood
    masking.*/
@@ -1907,13 +1908,17 @@
 
 /*Scratch space used by the psychovisual model.*/
 struct oc_psych_ctx{
+  /*The CSF filter pipeline stage.*/
+  oc_enc_pipe_stage    csf_pipe;
+  /*The spatial masking pipeline stage.*/
+  oc_enc_pipe_stage    mask_pipe;
   /*A single row of CSF-filtered coefficients (after vertical filtering).
     This has OC_CSF_FILTER_SZ_MAX-1 blocks of padding on each side.*/
   oc_weight_block     *csf_row;
   /*OC_MASK_WINDOW_SZ_MAX rows of CSF-filtered coefficients (after both
-     vertical and horizontal filtering).
+     vertical and horizontal filtering) for each plane.
     This has no padding on either side.*/
-  oc_weight_block    **csf_weights;
+  oc_weight_block    **csf_weights[3];
   /*The half-width of the CSF filters in the current filter banks, rounded
      down.*/
   int                  csf_filter_sizes[5][8];
@@ -1930,11 +1935,13 @@
   /*Sums of CSF-weighted masking values in each masking group over
      OC_MASK_WINDOW_SZ_MAX*2-1 rows, with OC_MASK_WINDOW_SZ_MAX-1 blocks of
      padding on either side.*/
-  oc_mask_block      **mask_groups;
+  oc_mask_block      **mask_groups[3];
   /*Individual CSF-weighted masking values over OC_MASK_WINDOW_SZ_MAX rows.*/
-  oc_weight_block    **mask_weights;
+  oc_weight_block    **mask_weights[3];
   /*The encoding context.*/
   oc_enc_ctx          *enc;
+  /*The vertical delay for CSF filtering.*/
+  int                  vsize_max[3];
 };
 
 
@@ -2043,6 +2050,383 @@
   }
 }
 
+static int oc_csf_pipe_start(oc_enc_pipe_stage *_stage){
+  oc_psych_ctx *psych;
+  int           pli;
+  psych=_stage->enc->vbr->psych;
+  oc_psych_csf_filters_interpolate(psych,_stage->enc->vbr->qscale);
+  for(pli=0;pli<3;pli++){
+    int *vsizes;
+    int  vfilti;
+    int  filti;
+    _stage->y_procd[pli]=0;
+    vfilti=pli<<1;
+    vsizes=psych->csf_filter_sizes[vfilti];
+    /*Find the number of rows we have to perform DCTs in advance.*/
+    psych->vsize_max[pli]=0;
+    for(filti=0;filti<8;filti++){
+      psych->vsize_max[pli]=OC_MAXI(psych->vsize_max[pli],vsizes[filti]);
+    }
+  }
+  return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_csf_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  oc_psych_ctx *psych;
+  int           pli;
+  psych=_stage->enc->vbr->psych;
+  for(pli=0;pli<3;pli++){
+    int y_procd;
+    int y_avail;
+    /*Compute how far we can get in complete fragment rows.*/
+    y_procd=_stage->y_procd[pli];
+    /*Add a vsize_max[pli] delay.*/
+    y_avail=(_y_avail[pli]&~7)-(psych->vsize_max[pli]<<3);
+    /*Perform CSF filtering on any newly available rows.*/
+    while(y_avail>y_procd){
+      oc_fragment_plane    *fplane;
+      oc_fragment          *frags;
+      oc_fragment_enc_info *efrags;
+      oc_weight_block      *maskw_row;
+      oc_weight_block      *csfw_row;
+      oc_csf_filter        *vfilters;
+      oc_csf_filter        *hfilters;
+      oc_mask_block        *maskg_row;
+      float                *csfw;
+      float                *maskw;
+      float                *maskg;
+      int                  *vsizes;
+      int                  *hsizes;
+      int                   fragi_end;
+      int                   fragx;
+      int                   cfragi0;
+      int                   cfragi;
+      int                   cfragj;
+      int                   mfragj;
+      int                   vfilti;
+      int                   hfilti;
+      int                   filti;
+      int                   ci;
+      int                   wi;
+      vfilti=pli<<1;
+      vfilters=psych->csf_filters[vfilti];
+      vsizes=psych->csf_filter_sizes[vfilti];
+      hfilti=(pli<<1)-(pli>0);
+      hfilters=psych->csf_filters[hfilti];
+      hsizes=psych->csf_filter_sizes[hfilti];
+      frags=psych->enc->state.frags;
+      efrags=psych->enc->frinfo;
+      fplane=psych->enc->state.fplanes+pli;
+      fragi_end=fplane->froffset+fplane->nfrags;
+      cfragi0=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+      /*First, the vertical filter.*/
+      for(fragx=0,cfragi=cfragi0;fragx<fplane->nhfrags;fragx++,cfragi++){
+        int fragi_off;
+        csfw=psych->csf_row[fragx+OC_CSF_FILTER_SZ_MAX-1];
+        for(ci=0;ci<64;ci++){
+          filti=ci>>3;
+          csfw[ci]=vfilters[filti][0]*efrags[cfragi].dct_coeffs[ci];
+          fragi_off=fplane->nhfrags;
+          for(wi=vsizes[filti];wi>0;wi--){
+            int coeffs;
+            cfragj=cfragi-fragi_off;
+            coeffs=cfragj>=fplane->froffset?efrags[cfragj].dct_coeffs[ci]:0;
+            cfragj=cfragi+fragi_off;
+            if(cfragj<fragi_end)coeffs+=efrags[cfragj].dct_coeffs[ci];
+            csfw[ci]+=0.5F*vfilters[filti][wi]*coeffs;
+            fragi_off+=fplane->nhfrags;
+          }
+        }
+      }
+      /*Next, the horizontal filtering.*/
+      maskg_row=psych->mask_groups[pli][OC_MASK_WINDOW_SZ_MAX-1<<1];
+      maskw_row=psych->mask_weights[pli][OC_MASK_WINDOW_SZ_MAX-1];
+      csfw_row=psych->csf_weights[pli][OC_MASK_WINDOW_SZ_MAX-1];
+      memset(maskg_row[0],0,sizeof(maskg_row[0])*(
+       fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+      for(fragx=0,cfragi=cfragi0;fragx<fplane->nhfrags;fragx++,cfragi++){
+        csfw=csfw_row[fragx];
+        if(frags[cfragi].invalid)memset(csfw,0,sizeof(oc_weight_block));
+        else{
+          cfragj=fragx+OC_CSF_FILTER_SZ_MAX-1;
+          mfragj=fragx+OC_MASK_WINDOW_SZ_MAX-1;
+          for(ci=0;ci<64;ci++){
+            filti=ci&7;
+            csfw[ci]=hfilters[filti][0]*psych->csf_row[cfragj][ci];
+            for(wi=hsizes[filti];wi>0;wi--){
+              csfw[ci]+=0.5F*hfilters[filti][wi]*(
+               psych->csf_row[cfragj-wi][ci]+psych->csf_row[cfragj+wi][ci]);
+            }
+            csfw[ci]=OC_FABSF(csfw[ci]);
+          }
+          maskw=maskw_row[fragx];
+          maskg=maskg_row[mfragj];
+          for(ci=1;ci<64;ci++){
+            maskw[ci]=OC_POWF(csfw[ci],OC_MASK_NU);
+            maskg[OC_MASK_GROUP[ci]]+=maskw[ci];
+          }
+        }
+      }
+      y_procd+=8;
+      _stage->y_procd[pli]=y_procd;
+      if(_stage->next!=NULL){
+        int ret;
+        ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+        if(ret<0)return ret;
+      }
+    }
+  }
+  return 0;
+}
+
+static int oc_csf_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_psych_ctx *psych;
+  int           pli;
+  int           ret;
+  int           y_avail[3];
+  psych=_stage->enc->vbr->psych;
+  for(pli=0;pli<3;pli++){
+    y_avail[pli]=_stage->enc->state.input[pli].height+
+    (psych->vsize_max[pli]<<3);
+  }
+  ret=oc_csf_pipe_process(_stage,y_avail);
+  if(ret<0)return ret;
+  return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the CSF filter stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_csf_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_csf_pipe_start;
+  _stage->pipe_proc=oc_csf_pipe_process;
+  _stage->pipe_end=oc_csf_pipe_end;
+}
+
+
+static int oc_mask_pipe_start(oc_enc_pipe_stage *_stage){
+  oc_psych_ctx *psych;
+  int           pli;
+  psych=_stage->enc->vbr->psych;
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    int                rowi;
+    _stage->y_procd[pli]=-(OC_MASK_WINDOW_SZ_MAX-1)<<3;
+    fplane=psych->enc->state.fplanes+pli;
+    /*Just clear out the mask group weights for the rows above the image.*/
+    for(rowi=OC_MASK_WINDOW_SZ_MAX-1;rowi<(OC_MASK_WINDOW_SZ_MAX-1<<1);rowi++){
+      memset(psych->mask_groups[pli][rowi][0],0,
+       sizeof(oc_mask_block)*(fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+    }
+  }
+  return 0;
+}
+
+static int oc_mask_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  static const int o=OC_MASK_WINDOW_SZ_MAX-1;
+  oc_psych_ctx *psych;
+  int           pli;
+  psych=_stage->enc->vbr->psych;
+  for(pli=0;pli<3;pli++){
+    int y_procd;
+    int y_avail;
+    /*Compute how far we can get in complete fragment rows.*/
+    y_procd=_stage->y_procd[pli];
+    /*Add an (OC_MASK_WINDOW_SZ_MAX-1) delay.*/
+    y_avail=(_y_avail[pli]&~7)-(o<<3);
+    /*Perform CSF filtering on any newly available rows.*/
+    while(y_avail>y_procd){
+      oc_weight_block  *csfw_row;
+      oc_weight_block  *maskw_row;
+      oc_mask_block   **mask_groups;
+      oc_mask_block    *maskg_row;
+      int               rowi;
+      mask_groups=psych->mask_groups[pli];
+      if(y_procd>0){
+        oc_fragment_plane    *fplane;
+        oc_fragment_enc_info *efrag;
+        oc_mask_block         group_sums;
+        float                *csf_offset;
+        float                *csfw;
+        float                *maskw;
+        float                 fscale;
+        float                 pscale;
+        int                   fragx;
+        int                   mfragi;
+        int                   mfragj;
+        int                   qti;
+        int                   gi;
+        int                   wi;
+        int                   wj;
+        int                   ci;
+        pscale=OC_YCbCr_SCALE[psych->enc->state.info.colorspace][pli];
+        csf_offset=psych->csf_offsets[pli];
+        maskw_row=psych->mask_weights[pli][0];
+        csfw_row=psych->csf_weights[pli][0];
+        for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+          group_sums[gi]=0;
+          for(wi=o-OC_MASK_WINDOW_SIZES[gi];wi<=o+OC_MASK_WINDOW_SIZES[gi];wi++){
+            for(wj=o;wj<o+OC_MASK_WINDOW_SIZES[gi];wj++){
+              group_sums[gi]+=mask_groups[wi][wj][gi];
+            }
+          }
+        }
+        fplane=psych->enc->state.fplanes+pli;
+        mfragi=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+        for(fragx=0;fragx<fplane->nhfrags;fragx++,mfragi++){
+          /*Add the parts of the group neighborhoods that are new.*/
+          mfragj=fragx+OC_MASK_WINDOW_SZ_MAX-1;
+          for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+            wj=OC_MASK_WINDOW_SIZES[gi];
+            for(wi=o-wj;wi<=o+wj;wi++){
+              group_sums[gi]+=mask_groups[wi][mfragj+wj][gi];
+            }
+          }
+          /*These groups are so small it is not worth incremental updates.*/
+          for(;gi<OC_MASK_NGROUPS;gi++){
+            group_sums[gi]=mask_groups[o][mfragj][gi]+
+             mask_groups[o-1][mfragj][gi]+mask_groups[o+1][mfragj][gi]+
+             mask_groups[o][mfragj-1][gi]+mask_groups[o][mfragj+1][gi];
+          }
+          /*Mask the coefficients in this block.*/
+          /*An offset is added to the numerator and denominator to prevent
+             against division by zero.
+            In effect, this is like a small addition of noise to the signal, but
+             it should be well below the visual threshold.
+            This is the best solution I could come up with to handle the problem
+             of 0's in the intra DCT coefficients that, because of motion
+             compensation, are not 0 in the inter DCT coefficients and thus need
+             a valid weight.
+            The DC coefficient has a different offset added than the others,
+             because the pixel values have 128 subtracted from them before the
+             DCT is performed, which offsets the DC coefficient by 4096.*/
+          efrag=psych->enc->frinfo+mfragi;
+          csfw=psych->csf_row[fragx];
+          maskw=maskw_row[fragx];
+          /*Compute the scaling value for this fragment.*/
+          fscale=pscale*efrag->imp_weight/psych->enc->vbr->qscale;
+          /*The DC coefficient is not masked.*/
+          efrag->tols[0]=(ogg_uint16_t)OC_MINI(65535,(int)(
+           (OC_CSF_DC_SHIFT+abs(efrag->dct_coeffs[0]))/
+           (fscale*(csf_offset[0]+csfw[0]))));
+          psych->enc->vbr->dc_tol_mins[pli]=(unsigned)OC_MINI(
+           psych->enc->vbr->dc_tol_mins[pli],efrag->tols[0]);
+          /*The remaining coefficients are masked.*/
+          for(ci=1;ci<64;ci++){
+            float mask;
+            gi=OC_MASK_GROUP[ci];
+            mask=group_sums[gi]-maskw[ci];
+            efrag->tols[ci]=(ogg_uint16_t)OC_MINI(65535,(int)(
+             ((OC_CSF_NOISE_LEVEL+abs(efrag->dct_coeffs[ci]))*
+             (1+OC_MASK_WEIGHTS[gi]*mask))/
+             (fscale*(csf_offset[ci]+csfw[ci]))));
+          }
+          /*Select minimum qi values for each quantizer type.*/
+          for(qti=2;qti-->0;){
+            unsigned qmin;
+            int      qi_min;
+            /*This is the minimum quantizer Theora allows.
+              Don't inflate the qi unnecessarily if we have a tolerance less
+               than this.*/
+            qmin=OC_AC_QUANT_MIN[qti];
+            qi_min=qti?0:efrag->qi_min[1];
+            for(ci=0;qi_min<63;qi_min++){
+              while(psych->enc->state.dequant_tables[qti][pli][qi_min][ci]<=
+               OC_MAXI(2U*efrag->tols[ci],qmin)&&++ci<64);
+              if(ci==64)break;
+            }
+            efrag->qi_min[qti]=(unsigned char)qi_min;
+          }
+  #if 0
+          /*Now undo all the work we did above and just use a constant quantizer
+             value for testing purposes.*/
+          efrag->qi_min[0]=efrag->qi_min[1]=_psych->enc->state.info.quality;
+          for(ci=0;ci<64;ci++){
+            efrag->tols[ci]=OC_MINI(
+             psych->enc->state.dequant_tables[0][pli][efrag->qi_min[0]][ci],
+             psych->enc->state.dequant_tables[1][pli][efrag->qi_min[1]][ci])+
+             1>>1;
+          }
+  #endif
+          /*Remove the parts of the group neighborhoods that are old.*/
+          for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+            wj=OC_MASK_WINDOW_SIZES[gi];
+            for(wi=o-wj;wi<=o+wj;wi++){
+              group_sums[gi]-=mask_groups[wi][mfragj-wj][gi];
+            }
+          }
+        }
+      }
+      /*Move the sliding windows.
+        The nice thing about these manually allocated 2D arrays is that we can
+         move rows around just by moving around the initial pointers to them,
+         not actually copying their contents.*/
+      maskg_row=mask_groups[0];
+      for(rowi=0;rowi<OC_MASK_WINDOW_SZ_MAX-1<<1;rowi++){
+        mask_groups[rowi]=mask_groups[rowi+1];
+      }
+      mask_groups[OC_MASK_WINDOW_SZ_MAX-1<<1]=maskg_row;
+      maskw_row=psych->mask_weights[pli][0];
+      csfw_row=psych->csf_weights[pli][0];
+      for(rowi=0;rowi<OC_MASK_WINDOW_SZ_MAX-1;rowi++){
+        psych->mask_weights[pli][rowi]=psych->mask_weights[pli][rowi+1];
+        psych->csf_weights[pli][rowi]=psych->csf_weights[pli][rowi+1];
+      }
+      psych->mask_weights[pli][OC_MASK_WINDOW_SZ_MAX-1]=maskw_row;
+      psych->csf_weights[pli][OC_MASK_WINDOW_SZ_MAX-1]=csfw_row;
+      y_procd+=8;
+    }
+    _stage->y_procd[pli]=y_procd;
+  }
+  return 0;
+}
+
+static int oc_mask_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_psych_ctx *psych;
+  int           y_avail[3];
+  int           rowi;
+  int           pli;
+  int           ret;
+  psych=_stage->enc->vbr->psych;
+  /*To finish up this stage, we need to keep the sliding windows sliding, so
+     we make a separate call to pipe_process() for each row.*/
+  for(pli=0;pli<3;pli++)y_avail[pli]=_stage->enc->state.input[pli].height;
+  for(rowi=OC_MASK_WINDOW_SZ_MAX-1;rowi-->0;){
+    for(pli=0;pli<3;pli++){
+      oc_fragment_plane *fplane;
+      fplane=psych->enc->state.fplanes+pli;
+      /*Just clear out the mask group weights for the rows below the image.*/
+      memset(psych->mask_groups[pli][OC_MASK_WINDOW_SZ_MAX-1<<1][0],0,
+       sizeof(oc_mask_block)*(fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+      y_avail[pli]+=8;
+    }
+    ret=oc_mask_pipe_process(_stage,y_avail);
+    if(ret<0)return ret;
+  }
+  /*Because dc_tol_mins[] is not complete until each plane has been completely
+     processed, we wait until then to execute the next pipeline stage.*/
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_start)(_stage->next);
+    if(ret<0)return ret;
+    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    if(ret<0)return ret;
+    return (*_stage->next->pipe_end)(_stage->next);
+  }
+  return 0;
+}
+
+/*Initialize the spatial masking stage of the pipeline.
+  _enc: The encoding context.*/
+static void oc_mask_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_mask_pipe_start;
+  _stage->pipe_proc=oc_mask_pipe_process;
+  _stage->pipe_end=oc_mask_pipe_end;
+}
+
+#if 0
 static void oc_psych_scan_plane(oc_psych_ctx *_psych,int _pli){
   oc_csf_filter        *vfilters;
   oc_csf_filter        *hfilters;
@@ -2081,7 +2465,7 @@
   int                   k;
   csf_offset=_psych->csf_offsets[_pli];
   /*Initialize the minimum psychovisual tolerance for the DC coefficient.*/
-   _psych->enc->dc_tol_mins[_pli]=32767;
+  _psych->enc->vbr->dc_tol_mins[_pli]=32767;
   /*Select the filter sets we're going to use.*/
   vfilti=_pli<<1;
   hfilti=_pli>0?(_pli<<1)-1:0;
@@ -2226,13 +2610,13 @@
         csfw=_psych->csf_weights[0][fragx];
         maskw=maskw_row[fragx];
         /*Compute the scaling value for this fragment.*/
-        fscale=pscale*efrag->imp_weight/_psych->enc->qscale;
+        fscale=pscale*efrag->imp_weight/_psych->enc->vbr->qscale;
         /*The DC coefficient is not masked.*/
         efrag->tols[0]=(ogg_uint16_t)OC_MINI(65535,(int)(
          (OC_CSF_DC_SHIFT+abs(efrag->dct_coeffs[0]))/
          (fscale*(csf_offset[0]+csfw[0]))));
-        _psych->enc->dc_tol_mins[_pli]=(unsigned)OC_MINI(
-         _psych->enc->dc_tol_mins[_pli],efrag->tols[0]);
+        _psych->enc->vbr->dc_tol_mins[_pli]=(unsigned)OC_MINI(
+         _psych->enc->vbr->dc_tol_mins[_pli],efrag->tols[0]);
         /*The remaining coefficients are masked.*/
         for(i=1;i<64;i++){
           float mask;
@@ -2299,38 +2683,58 @@
     _psych->csf_weights[OC_MASK_WINDOW_SZ_MAX-1]=csfw_row;
   }
 }
+#endif
 
 
 oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc){
   oc_psych_ctx *psych;
   int           nhfrags;
+  int           pli;
   nhfrags=_enc->state.fplanes[0].nhfrags;
   psych=(oc_psych_ctx *)_ogg_malloc(sizeof(*psych));
   psych->csf_row=(oc_weight_block *)_ogg_calloc(
    (nhfrags+(OC_CSF_FILTER_SZ_MAX-1<<1)),sizeof(psych->csf_row[0]));
-  psych->csf_weights=(oc_weight_block **)oc_malloc_2d(
-   OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(psych->csf_weights[0][0]));
-  psych->mask_groups=(oc_mask_block **)oc_malloc_2d(
-   (OC_MASK_WINDOW_SZ_MAX<<1)-1,nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1),
-   sizeof(psych->mask_groups[0][0]));
-  psych->mask_weights=(oc_weight_block **)oc_malloc_2d(
-   OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(psych->mask_weights[0][0]));
+  for(pli=0;pli<3;pli++){
+    nhfrags=_enc->state.fplanes[pli].nhfrags;
+    psych->csf_weights[pli]=(oc_weight_block **)oc_malloc_2d(
+     OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(oc_weight_block));
+    psych->mask_groups[pli]=(oc_mask_block **)oc_malloc_2d(
+     (OC_MASK_WINDOW_SZ_MAX<<1)-1,nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1),
+     sizeof(oc_mask_block));
+    psych->mask_weights[pli]=(oc_weight_block **)oc_malloc_2d(
+     OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(oc_weight_block));
+  }
   psych->enc=_enc;
+  /*Initialize our pipeline stages.*/
+  oc_csf_pipe_init(&psych->csf_pipe,_enc);
+  oc_mask_pipe_init(&psych->mask_pipe,_enc);
+  psych->csf_pipe.next=&psych->mask_pipe;
   return psych;
 }
 
 void oc_psych_free(oc_psych_ctx *_psych){
   if(_psych!=NULL){
+    int pli;
     _ogg_free(_psych->csf_row);
-    oc_free_2d((void **)_psych->csf_weights);
-    oc_free_2d((void **)_psych->mask_groups);
-    oc_free_2d((void **)_psych->mask_weights);
+    for(pli=0;pli<3;pli++){
+      oc_free_2d((void **)_psych->csf_weights[pli]);
+      oc_free_2d((void **)_psych->mask_groups[pli]);
+      oc_free_2d((void **)_psych->mask_weights[pli]);
+    }
     _ogg_free(_psych);
   }
 }
 
+oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
+ oc_enc_pipe_stage *_next){
+  _psych->mask_pipe.next=_next;
+  return &_psych->csf_pipe;
+}
+
+#if 0
 void oc_psych_scan(oc_psych_ctx *_psych,float _contrast){
   int pli;
   oc_psych_csf_filters_interpolate(_psych,_contrast);
   for(pli=0;pli<3;pli++)oc_psych_scan_plane(_psych,pli);
 }
+#endif

Modified: experimental/derf/theora-exp/lib/psych.h
===================================================================
--- experimental/derf/theora-exp/lib/psych.h	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/psych.h	2005-09-18 00:58:06 UTC (rev 10030)
@@ -1,5 +1,6 @@
 #if !defined(_psych_H)
 # define _psych_H (1)
+# include "encvbr.h"
 
 /*The assumed screen resolution vs. viewing distance.
   This is taken to be constant under the assumption that viewers will sit

Modified: experimental/derf/theora-exp/unix/Makefile
===================================================================
--- experimental/derf/theora-exp/unix/Makefile	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/unix/Makefile	2005-09-18 00:58:06 UTC (rev 10030)
@@ -78,8 +78,10 @@
 enquant.c \
 fdct.c \
 huffenc.c \
+mcenc.c \
+encmsc.c \
+encvbr.c \
 impmap.c \
-mcenc.c \
 psych.c \
 
 LIBTHEORAENC_CHEADERS =   \
@@ -88,6 +90,7 @@
 enquant.h \
 fdct.h \
 huffenc.h \
+encvbr.h \
 psych.h \
 
 DUMP_VIDEO_CSOURCES = dump_video.c

Modified: experimental/derf/theora-exp/win32/msvc60/dump_video.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/dump_video.dsp	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/dump_video.dsp	2005-09-18 00:58:06 UTC (rev 10030)
@@ -74,7 +74,7 @@
 # ADD BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpng.lib zlib.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpngd.lib zlibd_static.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
 
 !ENDIF 
 

Modified: experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp	2005-09-18 00:58:06 UTC (rev 10030)
@@ -39,9 +39,10 @@
 # PROP Use_Debug_Libraries 0
 # PROP Output_Dir "Release_encoder_example"
 # PROP Intermediate_Dir "Release_encoder_example"
+# PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /c
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 BSC32=bscmake.exe
@@ -65,7 +66,7 @@
 # PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /GZ /c
 # ADD BASE RSC /l 0x409 /d "_DEBUG"
 # ADD RSC /l 0x409 /d "_DEBUG"
 BSC32=bscmake.exe
@@ -73,7 +74,7 @@
 # ADD BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpng.lib zlib.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpngd.lib zlibd_static.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
 
 !ENDIF 
 

Modified: experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp	2005-09-18 00:58:06 UTC (rev 10030)
@@ -41,7 +41,7 @@
 # PROP Intermediate_Dir "Release_theorabase_static"
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "NDEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 BSC32=bscmake.exe
@@ -64,7 +64,7 @@
 # PROP Intermediate_Dir "Debug_theorabase_static"
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "_DEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
 # ADD BASE RSC /l 0x409 /d "_DEBUG"
 # ADD RSC /l 0x409 /d "_DEBUG"
 BSC32=bscmake.exe
@@ -113,6 +113,10 @@
 # PROP Default_Filter "h;hpp;hxx;hm;inl"
 # Begin Source File
 
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\dct.h
 # End Source File
 # Begin Source File

Modified: experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp	2005-09-18 00:58:06 UTC (rev 10030)
@@ -105,6 +105,10 @@
 # PROP Default_Filter "h;hpp;hxx;hm;inl"
 # Begin Source File
 
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\decint.h
 # End Source File
 # Begin Source File
@@ -137,7 +141,7 @@
 # End Source File
 # Begin Source File
 
-SOURCE=..\..\include\theora\theora.h
+SOURCE=..\..\include\theora\theoradec.h
 # End Source File
 # End Group
 # End Target

Modified: experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp	2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp	2005-09-18 00:58:06 UTC (rev 10030)
@@ -41,7 +41,7 @@
 # PROP Intermediate_Dir "Release_theoraenc_static"
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "NDEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 BSC32=bscmake.exe
@@ -64,7 +64,7 @@
 # PROP Intermediate_Dir "Debug_theoraenc_static"
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "_DEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
 # ADD BASE RSC /l 0x409 /d "_DEBUG"
 # ADD RSC /l 0x409 /d "_DEBUG"
 BSC32=bscmake.exe
@@ -93,10 +93,18 @@
 # End Source File
 # Begin Source File
 
+SOURCE=..\..\lib\encmsc.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\encode.c
 # End Source File
 # Begin Source File
 
+SOURCE=..\..\lib\encvbr.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\enquant.c
 # End Source File
 # Begin Source File
@@ -125,6 +133,10 @@
 # PROP Default_Filter "h;hpp;hxx;hm;inl"
 # Begin Source File
 
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\dct.h
 # End Source File
 # Begin Source File
@@ -133,6 +145,10 @@
 # End Source File
 # Begin Source File
 
+SOURCE=..\..\lib\encvbr.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\lib\enquant.h
 # End Source File
 # Begin Source File
@@ -165,7 +181,7 @@
 # End Source File
 # Begin Source File
 
-SOURCE=..\..\include\theora\theora.h
+SOURCE=..\..\include\theora\theoraenc.h
 # End Source File
 # End Group
 # End Target