[xiph-commits] r10044 - experimental/derf/theora-exp/lib

Thu Sep 22 13:24:22 PDT 2005

Author: tterribe
Date: 2005-09-22 13:24:19 -0700 (Thu, 22 Sep 2005)
New Revision: 10044

Modified:
   experimental/derf/theora-exp/lib/encmsc.c
   experimental/derf/theora-exp/lib/encvbr.c
   experimental/derf/theora-exp/lib/encvbr.h
Log:
Fix an error return code (OC_EINVAL is already negative, no need to negate it),
 rename a VBR function I forgot to, and set the svn:eol-style on the new files.


Modified: experimental/derf/theora-exp/lib/encmsc.c
===================================================================

--- experimental/derf/theora-exp/lib/encmsc.c	2005-09-22 20:21:40 UTC (rev 10043)
+++ experimental/derf/theora-exp/lib/encmsc.c	2005-09-22 20:24:19 UTC (rev 10044)
@@ -1,234 +1,234 @@
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <ogg/ogg.h>
-#include "encint.h"
-
-
-
-/*The VLC code used for mode schemes 0-6.*/
-static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
-  {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
-};
-
-/*The CLC code used for mode scheme 7.*/
-static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
-  {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
-};
-
-
-
-/*Initialize the mode scheme chooser.
-  This need only be called once per encoder.
-  This is probably the best place to describe the various scheme's Theora uses
-   to encode macro block modes.
-  There are 8 possible schemes.
-  Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
-  The same set of Huffman codes is used for each of these 7 schemes, but the
-   mode assigned to each code varies.
-  Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
-   0 writes a custom mapping to the bitstream before all the modes.
-  Finally, scheme 7 just encodes each mode directly in 3 bits.
-  Be warned that the number assigned to each mode is slightly different in the
-   bitstream than in this implementation, so a translation needs to be done.
-
-  Mode name:                 Source-code index;  Bit-stream index:
-  OC_MODE_INTRA              0                   1
-  OC_MODE_INTER_NOMV         1                   0
-  OC_MODE_INTER_MV           2                   2
-  OC_MODE_INTER_MV_LAST      3                   3
-  OC_MODE_INTER_MV_LAST2     4                   4
-  OC_MODE_INTER_MV_FOUR      5                   6
-  OC_MODE_GOLDEN_NOMV        6                   7
-  OC_MODE_GOLDEN_MV          7                   5
-
-  The bit stream indices come from the constants assigned to each mode in the
-   original VP3 source.*/
-void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
-  int msi;
-  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
-  for(msi=0;msi<7;msi++){
-    _chooser->mode_codes[msi]=OC_MODE_CODESA;
-    _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
-  }
-  _chooser->mode_codes[7]=OC_MODE_CODESB;
-}
-
-/*Reset the mode scheme chooser.
-  This needs to be called once for each frame, including the first.*/
-void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
-  int i;
-  memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
-  /*Scheme 0 starts with 24 bits to store the mode list in.*/
-  _chooser->scheme_bits[0]=24;
-  memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
-  for(i=0;i<8;i++){
-    /*Scheme 7 should always start first, and scheme 0 should always start
-       last.*/
-    _chooser->scheme_list[i]=7-i;
-    _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
-  }
-}
-
-/*This is the real purpose of this data structure: not actually selecting a
-   mode scheme, but estimating the cost of coding a given mode given all the
-   modes selected so far.
-  This is done via opportunity cost: the cost is defined as the number of bits
-   required to encode all the modes selected so far including the current one
-   using the best possible scheme, minus the number of bits required to encode
-   all the modes selected so far not including the current one using the best
-   possible scheme.
-  The computational expense of doing this probably makes it overkill.
-  Just be happy we take a greedy approach instead of trying to solve the
-   global mode-selection problem (which is NP-hard).
-  _mode: The mode to determine the cost of.
-  Return: The number of bits required to code this mode.*/
-int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode){
-  int scheme0;
-  int scheme1;
-  int si;
-  int scheme_bits;
-  int best_bits;
-  int mode_bits;
-  scheme0=_chooser->scheme_list[0];
-  scheme1=_chooser->scheme_list[1];
-  best_bits=_chooser->scheme_bits[scheme0];
-  mode_bits=_chooser->mode_codes[scheme0][
-   _chooser->mode_ranks[scheme0][_mode]].nbits;
-  /*Typical case: If the difference between the best scheme and the next best
-     is greater than 6 bits, then adding just one mode cannot change which
-     scheme we use.*/
-  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
-  /*Otherwise, check to see if adding this mode selects a different scheme
-     as the best.*/
-  si=1;
-  best_bits+=mode_bits;
-  do{
-    scheme1=_chooser->scheme_list[si];
-    /*For any scheme except 0, we can just use the bit cost of the mode's rank
-       in that scheme.*/
-    if(scheme1!=0){
-      scheme_bits=_chooser->scheme_bits[scheme1]+
-       _chooser->mode_codes[scheme1][
-       _chooser->mode_ranks[scheme1][_mode]].nbits;
-    }
-    else{
-      int ri;
-      /*For scheme 0, incrementing the mode count could potentially change the
-         mode's rank.
-        Find the index where the mode would be moved to in the optimal list,
-         and use its bit cost instead of the one for the mode's current
-         position in the list.*/
-      for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
-       _chooser->mode_counts[_mode]>=
-       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
-      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
-    }
-    if(scheme_bits<best_bits)best_bits=scheme_bits;
-    si++;
-  }
-  while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
-   _chooser->scheme_bits[scheme0]<=6);
-  return best_bits-_chooser->scheme_bits[scheme0];
-}
-
-/*Update the mode counts and per-scheme bit counts and re-order the scheme
-   lists once a mode has been selected.
-  _mode: The mode that was chosen.*/
-void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
- int _mode){
-  int ri;
-  int si;
-  _chooser->mode_counts[_mode]++;
-  /*Re-order the scheme0 mode list if necessary.*/
-  for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
-    int pmode;
-    pmode=_chooser->scheme0_list[ri-1];
-    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
-    _chooser->scheme0_ranks[pmode]++;
-    _chooser->scheme0_list[ri]=pmode;
-  }
-  _chooser->scheme0_ranks[_mode]=ri;
-  _chooser->scheme0_list[ri]=_mode;
-  /*Now add the bit cost for the mode to each scheme.*/
-  for(si=0;si<8;si++){
-    _chooser->scheme_bits[si]+=
-     _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
-  }
-  /*Finally, re-order the list of schemes.*/
-  for(si=1;si<8;si++){
-    int sj;
-    int scheme0;
-    int bits0;
-    scheme0=_chooser->scheme_list[si];
-    bits0=_chooser->scheme_bits[scheme0];
-    sj=si;
-    do{
-      int scheme1;
-      scheme1=_chooser->scheme_list[sj-1];
-      if(bits0>=_chooser->scheme_bits[scheme1])break;
-      _chooser->scheme_list[sj]=scheme1;
-    }
-    while(--sj>0);
-    _chooser->scheme_list[sj]=scheme0;
-  }
-}
-
-/*Update the count for each mode by the given amounts, and then re-rank the
-   schemes appropriately.
-  This allows fewer (e.g. 1) updates to be done, at the cost of a more
-   expensive update.
-  _mode_counts: The amount to add to each mode count.*/
-void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
- int _mode_counts[OC_NMODES]){
-  int mi;
-  int mj;
-  int ri;
-  int rj;
-  int si;
-  for(mi=0;mi<OC_NMODES;mi++){
-    _chooser->mode_counts[mi]+=_mode_counts[mi];
-  }
-  /*Re-order the scheme0 mode list if necessary.*/
-  for(ri=1;ri<OC_NMODES;ri++){
-    mi=_chooser->scheme0_list[ri];
-    rj=ri;
-    do{
-      mj=_chooser->scheme0_list[rj-1];
-      if(_chooser->mode_counts[mj]>=_chooser->mode_counts[mi])break;
-      _chooser->scheme0_ranks[mj]++;
-      _chooser->scheme0_list[rj]=mj;
-    }
-    while(--rj>0);
-    _chooser->scheme0_ranks[mi]=rj;
-    _chooser->scheme0_list[rj]=mi;
-  }
-  /*Now recompute the bit cost for each scheme.*/
-  for(si=0;si<8;si++){
-    _chooser->scheme_bits[si]=0;
-    for(mi=0;mi<8;mi++){
-      _chooser->scheme_bits[si]+=
-       _chooser->mode_codes[si][_chooser->mode_ranks[si][mi]].nbits*
-        _chooser->mode_counts[mi];
-    }
-  }
-  /*Scheme 0 starts with 24 bits to store the mode list in.*/
-  _chooser->scheme_bits[0]+=24;
-  /*Finally, re-order the list of schemes.*/
-  for(si=1;si<8;si++){
-    int sj;
-    int scheme0;
-    int bits0;
-    scheme0=_chooser->scheme_list[si];
-    bits0=_chooser->scheme_bits[scheme0];
-    sj=si;
-    do{
-      int scheme1;
-      scheme1=_chooser->scheme_list[sj-1];
-      if(bits0>=_chooser->scheme_bits[scheme1])break;
-      _chooser->scheme_list[sj]=scheme1;
-    }
-    while(--sj>0);
-    _chooser->scheme_list[sj]=scheme0;
-  }
-}
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encint.h"
+
+
+
+/*The VLC code used for mode schemes 0-6.*/
+static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
+  {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
+};
+
+/*The CLC code used for mode scheme 7.*/
+static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
+  {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+  This need only be called once per encoder.
+  This is probably the best place to describe the various scheme's Theora uses
+   to encode macro block modes.
+  There are 8 possible schemes.
+  Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
+  The same set of Huffman codes is used for each of these 7 schemes, but the
+   mode assigned to each code varies.
+  Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
+   0 writes a custom mapping to the bitstream before all the modes.
+  Finally, scheme 7 just encodes each mode directly in 3 bits.
+  Be warned that the number assigned to each mode is slightly different in the
+   bitstream than in this implementation, so a translation needs to be done.
+
+  Mode name:                 Source-code index;  Bit-stream index:
+  OC_MODE_INTRA              0                   1
+  OC_MODE_INTER_NOMV         1                   0
+  OC_MODE_INTER_MV           2                   2
+  OC_MODE_INTER_MV_LAST      3                   3
+  OC_MODE_INTER_MV_LAST2     4                   4
+  OC_MODE_INTER_MV_FOUR      5                   6
+  OC_MODE_GOLDEN_NOMV        6                   7
+  OC_MODE_GOLDEN_MV          7                   5
+
+  The bit stream indices come from the constants assigned to each mode in the
+   original VP3 source.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+  int msi;
+  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+  for(msi=0;msi<7;msi++){
+    _chooser->mode_codes[msi]=OC_MODE_CODESA;
+    _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
+  }
+  _chooser->mode_codes[7]=OC_MODE_CODESB;
+}
+
+/*Reset the mode scheme chooser.
+  This needs to be called once for each frame, including the first.*/
+void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+  int i;
+  memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]=24;
+  memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
+  for(i=0;i<8;i++){
+    /*Scheme 7 should always start first, and scheme 0 should always start
+       last.*/
+    _chooser->scheme_list[i]=7-i;
+    _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
+  }
+}
+
+/*This is the real purpose of this data structure: not actually selecting a
+   mode scheme, but estimating the cost of coding a given mode given all the
+   modes selected so far.
+  This is done via opportunity cost: the cost is defined as the number of bits
+   required to encode all the modes selected so far including the current one
+   using the best possible scheme, minus the number of bits required to encode
+   all the modes selected so far not including the current one using the best
+   possible scheme.
+  The computational expense of doing this probably makes it overkill.
+  Just be happy we take a greedy approach instead of trying to solve the
+   global mode-selection problem (which is NP-hard).
+  _mode: The mode to determine the cost of.
+  Return: The number of bits required to code this mode.*/
+int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode){
+  int scheme0;
+  int scheme1;
+  int si;
+  int scheme_bits;
+  int best_bits;
+  int mode_bits;
+  scheme0=_chooser->scheme_list[0];
+  scheme1=_chooser->scheme_list[1];
+  best_bits=_chooser->scheme_bits[scheme0];
+  mode_bits=_chooser->mode_codes[scheme0][
+   _chooser->mode_ranks[scheme0][_mode]].nbits;
+  /*Typical case: If the difference between the best scheme and the next best
+     is greater than 6 bits, then adding just one mode cannot change which
+     scheme we use.*/
+  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+  /*Otherwise, check to see if adding this mode selects a different scheme
+     as the best.*/
+  si=1;
+  best_bits+=mode_bits;
+  do{
+    scheme1=_chooser->scheme_list[si];
+    /*For any scheme except 0, we can just use the bit cost of the mode's rank
+       in that scheme.*/
+    if(scheme1!=0){
+      scheme_bits=_chooser->scheme_bits[scheme1]+
+       _chooser->mode_codes[scheme1][
+       _chooser->mode_ranks[scheme1][_mode]].nbits;
+    }
+    else{
+      int ri;
+      /*For scheme 0, incrementing the mode count could potentially change the
+         mode's rank.
+        Find the index where the mode would be moved to in the optimal list,
+         and use its bit cost instead of the one for the mode's current
+         position in the list.*/
+      for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
+       _chooser->mode_counts[_mode]>=
+       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
+    }
+    if(scheme_bits<best_bits)best_bits=scheme_bits;
+    si++;
+  }
+  while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
+   _chooser->scheme_bits[scheme0]<=6);
+  return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Update the mode counts and per-scheme bit counts and re-order the scheme
+   lists once a mode has been selected.
+  _mode: The mode that was chosen.*/
+void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mode){
+  int ri;
+  int si;
+  _chooser->mode_counts[_mode]++;
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
+    int pmode;
+    pmode=_chooser->scheme0_list[ri-1];
+    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
+    _chooser->scheme0_ranks[pmode]++;
+    _chooser->scheme0_list[ri]=pmode;
+  }
+  _chooser->scheme0_ranks[_mode]=ri;
+  _chooser->scheme0_list[ri]=_mode;
+  /*Now add the bit cost for the mode to each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]+=
+     _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
+  }
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    sj=si;
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}
+
+/*Update the count for each mode by the given amounts, and then re-rank the
+   schemes appropriately.
+  This allows fewer (e.g. 1) updates to be done, at the cost of a more
+   expensive update.
+  _mode_counts: The amount to add to each mode count.*/
+void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
+ int _mode_counts[OC_NMODES]){
+  int mi;
+  int mj;
+  int ri;
+  int rj;
+  int si;
+  for(mi=0;mi<OC_NMODES;mi++){
+    _chooser->mode_counts[mi]+=_mode_counts[mi];
+  }
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=1;ri<OC_NMODES;ri++){
+    mi=_chooser->scheme0_list[ri];
+    rj=ri;
+    do{
+      mj=_chooser->scheme0_list[rj-1];
+      if(_chooser->mode_counts[mj]>=_chooser->mode_counts[mi])break;
+      _chooser->scheme0_ranks[mj]++;
+      _chooser->scheme0_list[rj]=mj;
+    }
+    while(--rj>0);
+    _chooser->scheme0_ranks[mi]=rj;
+    _chooser->scheme0_list[rj]=mi;
+  }
+  /*Now recompute the bit cost for each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]=0;
+    for(mi=0;mi<8;mi++){
+      _chooser->scheme_bits[si]+=
+       _chooser->mode_codes[si][_chooser->mode_ranks[si][mi]].nbits*
+        _chooser->mode_counts[mi];
+    }
+  }
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]+=24;
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    sj=si;
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}


Property changes on: experimental/derf/theora-exp/lib/encmsc.c
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: experimental/derf/theora-exp/lib/encvbr.c
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.c	2005-09-22 20:21:40 UTC (rev 10043)
+++ experimental/derf/theora-exp/lib/encvbr.c	2005-09-22 20:24:19 UTC (rev 10044)
@@ -1,1416 +1,1416 @@
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <ogg/ogg.h>
-#include "encvbr.h"
-#include "fdct.h"
-
-
-
-/*Returns the number of bits used by the given motion vector with the VLC
-   motion vector codes (as opposed to the CLC codes, which always use 12 bits).
-  _dx: The X component of the vector, in half-pel units.
-  _dy: The Y component of the vector, in half-pel units.
-  Return: The number of bits required to store the vector with the VLC codes.*/
-static int oc_mvbitsa(int _dx,int _dy){
-  return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
-}
-
-
-
-/*Select the set of quantizers to use for the current frame for each possible
-   frame type (intra or inter).
-  This does not assign a quantizer to each fragment, as that depends on the
-   quantizer type used and thus is done during mode decision.*/
-static void oc_enc_vbr_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
-  unsigned              qmax[2][3];
-  int                   qi_min[2];
-  int                   qi_max[2];
-  int                   fti;
-  int                   qti;
-  int                   pli;
-  int                   dc_qi[2];
-  qi_min[0]=_enc->vbr->cfg.kf_qi_min;
-  qi_min[1]=_enc->vbr->cfg.df_qi_min;
-  qi_max[0]=_enc->vbr->cfg.kf_qi_max;
-  qi_max[1]=_enc->vbr->cfg.df_qi_max;
-  /*The first quantizer value is used for DC coefficients.
-    Select one that allows us to meet our quality requirements.*/
-  for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
-    qmax[qti][pli]=OC_MAXI(2U*_enc->vbr->dc_tol_mins[pli],
-     OC_DC_QUANT_MIN[qti]);
-  }
-  /*For intra frames...(containing just INTRA fragments)*/
-  for(dc_qi[0]=qi_min[0];dc_qi[0]<qi_max[0];dc_qi[0]++){
-    if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
-     _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
-     _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
-      break;
-    }
-  }
-  /*For inter frames...(containing both INTER and INTRA fragments)*/
-  if(!_intra_only){
-    for(dc_qi[1]=OC_CLAMPI(qi_min[1],dc_qi[0],qi_max[1]);dc_qi[1]<qi_max[1];
-     dc_qi[1]++){
-      if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
-       _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
-       _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
-        break;
-      }
-    }
-  }
-  /*Now we select a full qi list for each frame type.*/
-  for(fti=0;fti<1+!_intra_only;fti++){
-    oc_fragment_enc_info *efrag;
-    int                   ncoded_fragis;
-    int                   nqis[64];
-    int                   qi;
-    int                   qi0;
-    int                   qi1;
-    int                   qi2;
-    /*Here we count up the number of fragments that can use each qi value.
-      Unless we know this is an intra frame, we don't know what quantizer type
-       will be used for each fragment, so we just count both of them.*/
-    memset(nqis,0,sizeof(nqis));
-    if(fti){
-      int *coded_fragi;
-      int *coded_fragi_end;
-      coded_fragi=_enc->state.coded_fragis;
-      ncoded_fragis=_enc->state.ncoded_fragis[0]+
-       _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
-      coded_fragi_end=coded_fragi+ncoded_fragis;
-      for(;coded_fragi<coded_fragi_end;coded_fragi++){
-        efrag=_enc->frinfo+*coded_fragi;
-        for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
-      }
-    }
-    else{
-      oc_fragment_enc_info *efrag_end;
-      ncoded_fragis=_enc->state.nfrags;
-      efrag=_enc->frinfo;
-      efrag_end=efrag+ncoded_fragis;
-      for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
-    }
-    /*We'll now choose the qi values that divide the fragments into equally
-       sized groups, or as close as we can make it.
-      We account for the DC coefficients by adding an extra amount to the qi
-       value they require.
-      Since there are usually many more DC coefficients coded than any one AC
-       coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
-    nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
-    /*Convert this into a moment table.*/
-    for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
-    /*If we have a lower limit on the QI range, promote and fragments with a
-       smaller QI, to ensure they're counted.*/
-    if(qi_min[fti]>0)nqis[qi_min[fti]]=nqis[0];
-    /*Select our first quantizer.*/
-    for(qi0=qi_max[fti]+1;qi0-->qi_min[fti]&&nqis[qi0]<=0;);
-    for(qi1=qi0-1;qi1>=qi_min[fti]&&nqis[qi1]<=nqis[qi0];qi1--);
-    /*Test to make sure there are even two unique quantizers.*/
-    if(qi1>=qi_min[fti]){
-      ogg_int64_t best_metric;
-      ogg_int64_t metric;
-      int         best_qi1;
-      int         best_qi2;
-      int         qii;
-      for(qi2=qi1-1;qi2>=qi_min[fti]&&nqis[qi2]<=nqis[qi1];qi2--);
-      /*Test to make sure there are three unique quantizers.*/
-      if(qi2>=0){
-        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
-         (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
-        best_qi1=qi1;
-        best_qi2=qi2;
-        for(;nqis[qi1]<nqis[1];qi1--){
-          for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
-            metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
-             (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
-            if(metric>=best_metric){
-              best_qi1=qi1;
-              best_qi2=qi2;
-              best_metric=metric;
-            }
-          }
-        }
-        _enc->qis[fti][0]=qi0;
-        _enc->qis[fti][1]=best_qi1;
-        _enc->qis[fti][2]=best_qi2;
-        _enc->nqis[fti]=3;
-      }
-      else{
-        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
-        best_qi1=qi1;
-        if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
-          metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
-          if(metric>best_metric){
-            best_qi1=qi1;
-            best_metric=metric;
-          }
-        }
-        _enc->qis[fti][0]=qi0;
-        _enc->qis[fti][1]=best_qi1;
-        _enc->nqis[fti]=2;
-      }
-      /*Right now qis[0] is the largest.
-        We want to use the smallest that is still large enough for our DC
-         coefficients.*/
-      for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
-        qi0=_enc->qis[fti][0];
-        _enc->qis[fti][0]=_enc->qis[fti][qii];
-        _enc->qis[fti][qii]=qi0;
-      }
-    }
-    else{
-      _enc->qis[fti][0]=qi0;
-      _enc->nqis[fti]=1;
-    }
-    /*If we're in VP3 compatibility mode, just use the first quantizer.*/
-    if(_enc->vp3_compatible)_enc->nqis[fti]=1;
-  }
-}
-
-/*Mark all fragments as coded and in OC_MODE_INTRA.
-  This also selects a quantizer value for each fragment and builds up the
-   coded fragment list (in coded order) and clears the uncoded fragment list.
-  It does not update the coded macro block list, as that is not used when
-   coding INTRA frames.*/
-static void oc_enc_vbr_mark_all_intra(oc_enc_ctx *_enc){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    qii;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  /*Select the quantizer list for INTRA frames.*/
-  _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
-  for(qii=0;qii<_enc->state.nqis;qii++){
-    _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
-  }
-  prev_ncoded_fragis=ncoded_fragis=0;
-  sb=sb_end=_enc->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    fplane=_enc->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
-          oc_fragment_enc_info *efrag;
-          oc_fragment          *frag;
-          int                   fragi;
-          int                   best_qii;
-          fragi=sb->map[quadi][bi];
-          frag=_enc->state.frags+fragi;
-          frag->coded=1;
-          frag->mbmode=OC_MODE_INTRA;
-          efrag=_enc->frinfo+fragi;
-          best_qii=0;
-          for(qii=1;qii<_enc->state.nqis;qii++){
-            if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
-             (_enc->state.qis[best_qii]<efrag->qi_min[0]||
-             _enc->state.qis[qii]<_enc->state.qis[best_qii])){
-              best_qii=qii;
-            }
-          }
-          efrag->qii=(unsigned char)best_qii;
-          frag->qi=_enc->state.qis[best_qii];
-          _enc->state.coded_fragis[ncoded_fragis++]=fragi;
-#if defined(OC_BITRATE_STATS)
-          /*Compute the error function used for intra mode fragments.
-            This function can only use information known at mode decision time, and
-             so excludes the DC component.
-            TODO: Separate this out somewhere more useful.*/
-          {
-            oc_fragment_enc_info *efrag;
-            int                   ci;
-            int                   eerror;
-            efrag=_enc->frinfo+fragi;
-            eerror=0;
-            for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
-            efrag->eerror=eerror;
-          }
-#endif
-        }
-      }
-    }
-    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _enc->state.nuncoded_fragis[pli]=0;
-  }
-  _enc->ncoded_frags=ncoded_fragis;
-}
-
-
-
-/*Quantize and predict the DC coefficients.
-  This is done in a separate step because the prediction of DC coefficients
-   occurs in image order, not in the Hilbert-curve order, unlike the rest of
-   the encoding process.*/
-static void oc_enc_vbr_quant_dc(oc_enc_ctx *_enc){
-  oc_fragment_enc_info *efrag;
-  oc_fragment          *frag;
-  int                   pli;
-  frag=_enc->state.frags;
-  efrag=_enc->frinfo;
-  for(pli=0;pli<3;pli++){
-    oc_fragment_plane *fplane;
-    unsigned           fquant;
-    unsigned           iquant;
-    int                pred_last[3];
-    int                fragx;
-    int                fragy;
-    pred_last[OC_FRAME_GOLD]=0;
-    pred_last[OC_FRAME_PREV]=0;
-    pred_last[OC_FRAME_SELF]=0;
-    fplane=_enc->state.fplanes+pli;
-    for(fragy=0;fragy<fplane->nvfrags;fragy++){
-      for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
-        int qc_pred;
-        int qc;
-        if(!frag->coded)continue;
-        qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
-        /*Fragments outside the displayable region must still be coded in key
-           frames.
-          To minimize wasted bits, just use the predicted DC value.
-          TODO: We might do a better job in the lower-left hand corner by
-           propagating over the DC value of the first actually coded fragment,
-           but for the moment this is not done.*/
-        if(frag->invalid)qc=0;
-        else{
-          int c;
-          int c_abs;
-          int qti;
-          /*We now center the DC coefficient range around the predicted value
-             and perform token bits optimization based on the HVS-determined
-             tolerance range.
-            For more details, see oc_enc_vbr_frag_quant_tokenize().*/
-          qti=frag->mbmode!=OC_MODE_INTRA;
-          iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
-          c=efrag->dct_coeffs[0]-qc_pred*iquant;
-          c_abs=abs(c);
-          if(c_abs<=efrag->tols[0])qc=0;
-          else{
-            int qc_signed[2];
-            int qc_max;
-            int qc_min;
-            int qc_offs;
-            int c_sign;
-            int c_min;
-            int c_recon;
-            int cati;
-            fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
-            qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
-            c_sign=c<0;
-            c_recon=(qc_max-1)*iquant;
-            c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
-            for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
-            if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
-            else{
-              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
-               cati++){
-                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-              }
-              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
-            }
-            qc_signed[0]=qc;
-            qc_signed[1]=-qc;
-            qc=qc_signed[c_sign];
-          }
-        }
-        pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
-        efrag->dct_coeffs[0]=(ogg_int16_t)qc;
-      }
-    }
-  }
-}
-
-/*Quantize and tokenize the given fragment.
-  _efrag:   The encoder information for the fragment to quantize.
-  _qcoeffs: The quantized coefficients, in zig-zag order.
-  _fquant:  The forward quantization matrix to use.
-  _iquant:  The inverse quantization matrix to use.
-  Return: The number of coefficients before any final zero run.*/
-static int oc_enc_vbr_frag_quant_tokenize(oc_enc_ctx *_enc,
- oc_fragment_enc_info *_efrag,ogg_int16_t _qcoeffs[64],
- const ogg_uint16_t _fquant[64],const ogg_uint16_t _iquant[64]){
-  int zzi;
-  int zrun;
-  int qc;
-  int qc_offs;
-  int c_sign;
-  int cati;
-  int tli;
-  /*The DC coefficient is already quantized (it had to be for DC prediction).
-    Here we just tokenize it.*/
-  if(_efrag->dct_coeffs[0]){
-    qc=abs(_efrag->dct_coeffs[0]);
-    c_sign=_efrag->dct_coeffs[0]<0;
-    switch(qc){
-      case 1:{
-        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-         (unsigned char)(OC_ONE_TOKEN+c_sign);
-      }break;
-      case 2:{
-        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-         (unsigned char)(OC_TWO_TOKEN+c_sign);
-      }break;
-      default:{
-        if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
-          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-           (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
-          _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
-        }
-        else{
-          qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-          for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
-            qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-          }
-          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
-           (unsigned char)(OC_DCT_VAL_CAT3+cati);
-          _enc->extra_bits[0][_enc->nextra_bits[0]++]=
-           (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
-        }
-      }
-    }
-    zrun=0;
-  }
-  else zrun=1;
-  /*Now we quantize and tokenize each AC coefficient.*/
-  for(zzi=1;zzi<64;zzi++){
-    int qc_signed[2];
-    int qc_max;
-    int qc_min;
-    int c_sign;
-    int c_abs;
-    int c_min;
-    int c_recon;
-    int ci;
-    ci=OC_FZIG_ZAG[zzi];
-    c_abs=abs(_efrag->dct_coeffs[ci]);
-    /*Best case: we can encode this as a zero.*/
-    if(c_abs<=_efrag->tols[ci]){
-      zrun++;
-      _qcoeffs[zzi]=0;
-    }
-    else{
-      c_sign=_efrag->dct_coeffs[ci]<0;
-      /*qc_max is the most accurate quantized value.
-        This is the largest possible (absolute) value we will use.*/
-      qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
-      /*qc_min is the smallest possible (by absolute value) quantized value
-         whose dequantized value is within the HVS-determined tolerance
-         range.*/
-      /*TODO: qc_min could be computed by a division (we do not want to allow
-         the rounding errors that are possible with the mul+shift quantization
-         used for qc_max), which would allow qc_max to be calculated only if
-         needed below.
-        Is this faster?
-        Who knows.*/
-      c_recon=(qc_max-1)*_iquant[ci];
-      c_min=c_abs-_efrag->tols[ci];
-      for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
-      /*We now proceed to find a token that is as close to qc_max as possible,
-         but does not use any more bits than would be required for qc_min.
-        The general assumption we make is that encoding a value closer to 0
-         always uses fewer bits.
-        qc_min can still reach 0 here despite the test above, if the quantizer
-         value is larger than the tolerance (which can happen for very small
-         tolerances; the quantizer value has a minimum it cannot go below).*/
-      if(qc_min==0){
-        zrun++;
-        _qcoeffs[zzi]=0;
-      }
-      else{
-        /*If we have an outstanding zero run, code it now.*/
-        if(zrun>0){
-          /*The zero run tokens appear on the list for the first zero in the
-             run.*/
-          tli=zzi-zrun;
-          /*Second assumption: coding a combined run/value token always uses
-             fewer bits than coding them separately.*/
-          /*CAT1 run/value tokens: the value is 1.*/
-          if(qc_min==1&&zrun<=17){
-            if(zrun<=5){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)c_sign;
-            }
-            else if(zrun<=9){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT1B;
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<2)+zrun-6);
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT1C;
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<3)+zrun-10);
-            }
-            qc_signed[0]=1;
-            qc_signed[1]=-1;
-            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
-            zrun=0;
-            /*Skip coding the DCT value below.*/
-            continue;
-          }
-          /*CAT2 run/value tokens: the value is 2-3.*/
-          else if(qc_min<=3&&zrun<=3){
-            if(zrun==1){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT2A;
-              qc=OC_MINI(3,qc_max);
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<1)+qc-2);
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_RUN_CAT2B;
-              qc=OC_MINI(3,qc_max);
-              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-               (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
-            }
-            qc_signed[0]=qc;
-            qc_signed[1]=-qc;
-            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
-            zrun=0;
-            /*Skip coding the DCT value below.*/
-            continue;
-          }
-          /*The run is too long or the quantized value too large: code them
-             separately.*/
-          else{
-            /*This is stupid: non-short ZRL tokens are never used for run
-               values less than 9, but codewords are reserved for them,
-               wasting bits.
-              Yes, yes, this would've meant a non-constant number of extra
-               bits for this token, but even so.*/
-            if(zrun<=8){
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_SHORT_ZRL_TOKEN;
-            }
-            else{
-              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
-               OC_DCT_ZRL_TOKEN;
-            }
-            _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
-             (ogg_uint16_t)(zrun-1);
-            zrun=0;
-          }
-        }
-        /*No zero run, or the run and the qc value are being coded
-           separately.*/
-        switch(qc_min){
-          case 1:{
-            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-             (unsigned char)(OC_ONE_TOKEN+c_sign);
-            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<1)+1);
-          }break;
-          case 2:{
-            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-             (unsigned char)(OC_TWO_TOKEN+c_sign);
-            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<2)+2);
-          }break;
-          default:{
-            if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
-              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-               (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
-              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
-               (ogg_uint16_t)c_sign;
-              qc_signed[0]=qc_min;
-              qc_signed[1]=-qc_min;
-              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
-            }
-            else{
-              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
-              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
-               cati++){
-                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
-              }
-              /*qc_min can be encoded in this category.
-                Since all DCT values in the category use the same number of
-                 bits, we encode the closest value to qc_max.
-                This is either qc_max itself, if it is in the category's
-                 range, or the largest value in the category.*/
-              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
-              qc_signed[0]=qc;
-              qc_signed[1]=-qc;
-              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
-              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
-               (unsigned char)(OC_DCT_VAL_CAT3+cati);
-              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
-               ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
-            }
-          }
-        }
-      }
-    }
-  }
-  /*If there's a trailing zero run, code an EOB token.*/
-  if(zrun>0){
-    int old_tok;
-    int toki;
-    int ebi;
-    tli=64-zrun;
-    toki=_enc->ndct_tokens[tli]-1;
-    if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
-    else old_tok=-1;
-    /*Try to extend an EOB run.*/
-    switch(old_tok){
-      case OC_DCT_EOB1_TOKEN:
-      case OC_DCT_EOB2_TOKEN:{
-        _enc->dct_tokens[tli][toki]++;
-      }break;
-      case OC_DCT_EOB3_TOKEN:{
-        _enc->dct_tokens[tli][toki]++;
-        _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
-      }break;
-      case OC_DCT_REPEAT_RUN0_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          _enc->extra_bits[tli][ebi]=0;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN1_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          _enc->extra_bits[tli][ebi]=0;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN2_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
-        else{
-          _enc->dct_tokens[tli][toki]++;
-          /*Again stupid: we could encode runs up to 4127, but inexplicably
-             they don't subtract the bottom of the range here, so we can only
-             go to 4095 (unless we want to change the spec to deal with
-             wrap-around).*/
-          _enc->extra_bits[tli][ebi]=32;
-        }
-      }break;
-      case OC_DCT_REPEAT_RUN3_TOKEN:{
-        ebi=_enc->nextra_bits[tli]-1;
-        if(_enc->extra_bits[tli][ebi]<4095){
-          _enc->extra_bits[tli][ebi]++;
-          break;
-        }
-        /*else fall through.*/
-      }
-      /*Start a new EOB run.*/
-      default:{
-        _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
-      }
-    }
-  }
-  /*Return the number of coefficients before the final zero run.*/
-  return 64-zrun;
-}
-
-static void oc_enc_vbr_residual_tokenize(oc_enc_ctx *_enc){
-  int *coded_fragi;
-  int *coded_fragi_end;
-  int    pli;
-  int    zzi;
-  /*Clear any existing DCT tokens.*/
-  for(zzi=0;zzi<64;zzi++){
-    _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
-    _enc->extra_bits_offs[zzi]=0;
-  }
-  coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
-  for(pli=0;pli<3;pli++){
-    memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
-     sizeof(_enc->dct_token_offs[pli]));
-    coded_fragi_end+=_enc->state.ncoded_fragis[pli];
-    for(;coded_fragi<coded_fragi_end;coded_fragi++){
-      oc_quant_table       *iquants;
-      oc_fragment          *frag;
-      oc_fragment_enc_info *efrag;
-      ogg_int16_t           qcoeffs[64];
-      int                   fragi;
-      int                   qti;
-      int                   nnzc;
-      fragi=*coded_fragi;
-      frag=_enc->state.frags+fragi;
-      efrag=_enc->frinfo+fragi;
-      qti=frag->mbmode!=OC_MODE_INTRA;
-      iquants=_enc->state.dequant_tables[qti][pli];
-      nnzc=oc_enc_vbr_frag_quant_tokenize(_enc,efrag,qcoeffs,
-       _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
-      /*While we're here and things are in cache, reconstruct the quantized
-         fragment.*/
-      oc_state_frag_recon(&_enc->state,frag,pli,qcoeffs,nnzc,nnzc,
-       iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
-    }
-  }
-  /*Merge the final EOB run of one coefficient list with the start of the
-     next, if possible.*/
-  for(zzi=1;zzi<64;zzi++){
-    static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
-    static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
-    int old_tok1;
-    int old_tok2;
-    int old_eb1;
-    int old_eb2;
-    int new_tok;
-    int toki;
-    int zzj;
-    int ebi;
-    int runl;
-    /*Make sure this coefficient has tokens at all.*/
-    if(_enc->ndct_tokens[zzi]<=0)continue;
-    /*Ensure the first token is an EOB run.*/
-    old_tok2=_enc->dct_tokens[zzi][0];
-    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
-    /*Search for a previous coefficient that has any tokens at all.*/
-    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
-    zzj=zzi-1;
-    do{
-      toki=_enc->ndct_tokens[zzj]-1;
-      if(toki>=_enc->dct_token_offs[0][zzj]){
-        old_tok1=_enc->dct_tokens[zzj][toki];
-        break;
-      }
-    }
-    while(zzj-->0);
-    /*Ensure its last token was an EOB run.*/
-    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
-    /*Pull off the associated extra bits, if any, and decode the runs.*/
-    ebi=_enc->nextra_bits[zzj];
-    old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?_enc->extra_bits[zzj][--ebi]:0;
-    old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?_enc->extra_bits[zzi][0]:0;
-    runl=OC_EOB_OFFS[old_tok1]+old_eb1+OC_EOB_OFFS[old_tok2]+old_eb2;
-    /*We can't possibly combine these into one run.
-      It might be possible to split them more optimally, but we'll just leave
-       them as is.*/
-    if(runl>=4096)continue;
-    /*We CAN combine them into one run.*/
-    for(new_tok=OC_DCT_EOB1_TOKEN;
-     runl-OC_EOB_OFFS[new_tok]>=OC_EOB_RANGE[new_tok];new_tok++);
-    /*toki is always initialized.
-      If your compiler thinks otherwise, it is dumb.*/
-    _enc->dct_tokens[zzj][toki]=(unsigned char)new_tok;
-    /*Update the two token lists.*/
-    if(OC_DCT_TOKEN_EXTRA_BITS[new_tok]){
-      _enc->extra_bits[zzj][ebi++]=(ogg_uint16_t)(
-       runl-OC_EOB_OFFS[new_tok]);
-    }
-    _enc->nextra_bits[zzj]=ebi;
-    _enc->dct_token_offs[0][zzi]++;
-    /*Note: We don't bother to update the offsets for planes 1 and 2 if
-       planes 0 or 1 don't have any tokens.
-      This turns out not to matter due to the way we use the offsets later.*/
-    if(OC_DCT_TOKEN_EXTRA_BITS[old_tok2])_enc->extra_bits_offs[zzi]++;
-  }
-}
-
-/*Marks each fragment as coded or not, based on the coefficient-level
-   thresholds computed in the psychovisual stage.
-  The MB mode of the fragments are not set, as they will be computed in
-   oc_enc_choose_mbmodes().
-  This also builds up the coded fragment and uncoded fragment lists.
-  The coded MB list is not built up.
-  That is done during mode decision.*/
-static void oc_enc_mark_coded(oc_enc_ctx *_enc){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    bli;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  int    nuncoded_fragis;
-  int    prev_nuncoded_fragis;
-  _enc->nblock_coded_flags=bli=0;
-  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
-  sb=sb_end=_enc->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    int                      ystride;
-    int                      prev_refi;
-    fplane=_enc->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
-    ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      sb->coded_fully=1;
-      sb->coded_partially=0;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
-          if(fragi>=0){
-            oc_fragment *frag;
-            int          flag;
-            frag=_enc->state.frags+fragi;
-            if(frag->invalid){
-              frag->coded=0;
-              *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
-            }
-            else{
-              oc_fragment_enc_info *efrag;
-              ogg_int16_t           dct_buf[64];
-              int                   ci;
-              /*Check to see if the fragment can be skipped.
-                It is assumed that a skipped fragment always takes fewer bits
-                 than a coded fragment, though this may not necessarily be true.
-                A single skipped fragment could take up to 34 bits to encode
-                 its location in the RLE scheme Theora uses */
-              oc_frag_intra_fdct(frag,dct_buf,ystride,prev_refi);
-              efrag=_enc->frinfo+fragi;
-              /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
-                 ensures we mark a fragment as skipped if it would be quantized
-                 to all zeros in OC_MODE_INTER_NOMV.
-                These minimum quantizers represent the maximum quality the
-                 format is capable of, and can be larger than our tolerances.
-                The minimum for INTER modes is twice the minimum for INTRA
-                 modes, so technically if the tolerances are below this
-                 threshold, we might be able to do a better job representing
-                 this fragment by coding it in INTRA mode.
-                But the number of extra bits required to do that would be
-                 ridiculous, so we give up our devotion to minimum quality just
-                 this once.
-
-                Note: OC_DC_QUANT_MIN[0] should actually be
-                 OC_DC_QUANT_MIN[1]>>1, but in this case those are
-                 equivalent.*/
-              ci=0;
-              if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
-               OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
-                for(ci++;ci<64;ci++){
-                  if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
-                   OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
-                    break;
-                  }
-                }
-              }
-              if(ci>=64){
-                frag->coded=0;
-                *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
-              }
-              else{
-                frag->coded=1;
-                _enc->state.coded_fragis[ncoded_fragis++]=fragi;
-              }
-            }
-            flag=frag->coded;
-            sb->coded_fully&=flag;
-            sb->coded_partially|=flag;
-            _enc->block_coded_flags[bli++]=(char)flag;
-          }
-        }
-      }
-      /*If this is a partially coded super block, keep the entries just added
-         to the code block flag list.*/
-      if(!sb->coded_fully&&sb->coded_partially){
-        _enc->nblock_coded_flags=bli;
-      }
-      /*Otherwise, discard these entries from the list, as they are
-         implicit.*/
-      else{
-        sb->coded_partially=0;
-        bli=_enc->nblock_coded_flags;
-      }
-    }
-    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
-    prev_nuncoded_fragis=nuncoded_fragis;
-  }
-  _enc->ncoded_frags=ncoded_fragis;
-}
-
-/*Selects an appropriate coding mode for each macro block.
-  A mode is chosen for the macro blocks with at least one coded fragment.
-  A bit cost estimate for coding the frame with the selected modes is made,
-   and a similar estimate is made for coding the frame as a key frame.
-  These estimates are used to select the optimal frame type.
-  Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
-static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
-  oc_set_chroma_mvs_func  set_chroma_mvs;
-  oc_fragment_enc_info   *efrag;
-  oc_fragment            *frag;
-  oc_mb                  *mb;
-  oc_mb_enc_info         *mbinfo;
-  char                    last_mv[2][2];
-  int                    *uncoded_fragi;
-  int                    *uncoded_fragi_end;
-  int                     best_qii;
-  int                     qii;
-  int                     qi;
-  int                     pli;
-  int                     mbi;
-  int                     fragi;
-  int                     ci;
-  int                     nmbs;
-  int                     mvbitsa;
-  int                     mvbitsb;
-  int                     intra_bits;
-  int                     inter_bits;
-  nmbs=_enc->state.nmbs;
-  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
-  oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
-  memset(last_mv,0,sizeof(last_mv));
-  mbinfo=_enc->mbinfo;
-  mvbitsa=mvbitsb=0;
-  inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
-  intra_bits=inter_bits+3;
-  _enc->state.ncoded_mbis=0;
-  for(mbi=0;mbi<nmbs;mbi++){
-    mb=_enc->state.mbs+mbi;
-    if(mb->mode!=OC_MODE_INVALID){
-      oc_fragment_enc_info *efrag;
-      char                  bmvs[2][4][2];
-      char                  mbmv[2];
-      int                   err[OC_NMODES][12];
-      int                   bits[OC_NMODES];
-      int                   coded[13];
-      int                   frag_qii[12][2][2];
-      int                   ncoded;
-      int                   ncoded_luma;
-      int                   mapii;
-      int                   mapi;
-      int                   modei;
-      int                   codedi;
-      int                   mbintrabits;
-      int                   mbpmvbitsa;
-      int                   mbgmvbitsa;
-      int                   mb4mvbitsa;
-      int                   mb4mvbitsb;
-      int                   fti;
-      int                   qti;
-      int                   bi;
-      mbinfo=_enc->mbinfo+mbi;
-      /*Build up a list of coded fragments.*/
-      ncoded=0;
-      for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
-        mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
-        fragi=mb->map[mapi>>2][mapi&3];
-        if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
-      }
-      /*If we don't find any, mark this MB not coded and move on.*/
-      if(ncoded<=0){
-        mb->mode=OC_MODE_NOT_CODED;
-        /*Don't bother to do a MV search against the golden frame.
-          Just re-use the last vector, which should match well since the
-           contents of the MB haven't changed much.*/
-        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
-        mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
-        continue;
-      }
-      /*Count the number of coded blocks that are luma blocks, and replace the
-         block MVs for not-coded blocks with (0,0).*/
-      memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
-      /*Mark the end of the list so we don't go past it below.*/
-      coded[ncoded]=-1;
-      for(mapi=ncoded_luma=0;mapi<4;mapi++){
-        if(coded[ncoded_luma]==mapi)ncoded_luma++;
-        else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
-      }
-      /*Select a qi value for each coded fragment for each frame type and
-         quantizer type.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
-        for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
-          best_qii=0;
-          for(qii=1;qii<_enc->nqis[fti];qii++){
-            if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
-             (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
-             _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
-              best_qii=qii;
-            }
-          }
-          frag_qii[codedi][fti][qti]=best_qii;
-        }
-      }
-      /*Special case: If no luma blocks are coded, but some chroma blocks are,
-         then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
-         be explicitly coded for it.*/
-      if(ncoded_luma<=0){
-        mb->mode=OC_MODE_NOT_CODED;
-        /*Don't bother to do a MV search against the golden frame.*/
-        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
-        /*We do collect bitrate stats for frame type decision.*/
-        mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
-        for(codedi=0;codedi<ncoded;codedi++){
-          mapi=coded[codedi];
-          pli=mapi>>2;
-          fragi=mb->map[pli][mapi&3];
-          frag=_enc->state.frags+fragi;
-          efrag=_enc->frinfo+fragi;
-          /*Set the MB mode and MV in the fragment.*/
-          frag->mbmode=OC_MODE_INTER_NOMV;
-          frag->mv[0]=frag->mv[1]=0;
-          /*Calculate the bitrate estimates.*/
-          err[OC_MODE_INTRA][mapi]=0;
-          for(ci=1;ci<64;ci++){
-            err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
-          }
-          err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
-           OC_FRAME_PREV);
-          qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
-          mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-           OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-          qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
-          bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
-           OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
-          /*Also mark this fragment with the selected INTER qi.
-            It will be reset if we eventually code this as an INTRA frame.*/
-#if defined(OC_BITRATE_STATS)
-          efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
-#endif
-          efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
-          frag->qi=qi;
-        }
-        intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-        inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-        continue;
-      }
-      /*Otherwise, add this to the coded MB list.*/
-      _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
-      /*Compute the chroma MVs for the 4MV mode.*/
-      (*set_chroma_mvs)(bmvs[1],bmvs[0]);
-      /*Do a MV search against the golden frame.*/
-      oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
-      /*We are now ready to do mode decision for this macro block.
-        Mode decision is done by exhaustively examining all potential choices.
-        Since we use a minimum-quality encoding strategy, this amounts to
-         simply selecting the mode which uses the smallest number of bits,
-         since the minimum quality will be met in any mode.
-        Obviously, doing the motion compensation, fDCT, tokenization, and then
-         counting the bits each token uses is computationally expensive.
-        Theora's EOB runs can also split the cost of these tokens across
-         multiple fragments, and naturally we don't know what the optimal
-         choice of Huffman codes will be until we know all the tokens we're
-         going to encode in all the fragments.
-
-        So we use a simple approach to estimating the bit cost of each mode
-         based upon the SAD value of the residual.
-        The mathematics behind the technique are outlined by Kim \cite{Kim03},
-         but the process is very simple.
-        For each quality index and SAD value, we have a table containing the
-         average number of bits needed to code a fragment.
-        The SAD values are placed into a small number of bins (currently 16).
-        The bit counts are obtained by examining actual encoded frames, with
-         optimal Huffman codes selected and EOB bits appropriately divided
-         among all the blocks they involve.
-        A separate QIxSAD table is kept for each mode and color plane.
-        It may be possible to combine many of these, but only experimentation
-         will tell which ones truly represent the same distribution.
-
-        @ARTICLE{Kim03,
-          author="Hyun Mun Kim",
-          title="Adaptive Rate Control Using Nonlinear Regression",
-          journal="IEEE Transactions on Circuits and Systems for Video
-           Technology",
-          volume=13,
-          number=5,
-          pages="432--439",
-          month="May",
-          year=2003
-        }*/
-      memset(bits,0,sizeof(bits));
-      mbintrabits=0;
-      /*Find the SAD values for each coded fragment for each possible mode.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        pli=mapi>>2;
-        bi=mapi&3;
-        fragi=mb->map[pli][bi];
-        frag=_enc->state.frags+fragi;
-        efrag=_enc->frinfo+fragi;
-        err[OC_MODE_INTRA][mapi]=0;
-        for(ci=1;ci<64;ci++){
-          err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
-        }
-        err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
-         OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
-         mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
-         pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
-         last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
-         last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
-         bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
-        err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
-         0,0,pli,OC_FRAME_GOLD);
-        err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
-         mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
-         pli,OC_FRAME_GOLD);
-        /*Using these distortion values, estimate the number of bits needed to
-           code this fragment in each mode.*/
-        qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
-        mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
-        bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
-        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
-        for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
-          bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
-           OC_MINI(err[modei][mapi]>>6,15)];
-        }
-      }
-      /*Bit costs are stored in the table with extra precision.
-        Round them down to whole bits here.*/
-      for(modei=0;modei<OC_NMODES;modei++){
-        bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-      }
-      /*Estimate the cost of coding the label for each mode.
-        See comments at oc_mode_scheme_chooser_cost() for a description of the
-         method.*/
-      for(modei=0;modei<OC_NMODES;modei++){
-        bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
-         modei);
-      }
-      /*Add the motion vector bits for each mode that requires them.*/
-      mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
-       mbinfo->mvs[0][OC_FRAME_PREV][1]);
-      mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
-       mbinfo->mvs[0][OC_FRAME_GOLD][1]);
-      mb4mvbitsa=mb4mvbitsb=0;
-      for(codedi=0;codedi<ncoded_luma;codedi++){
-        mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
-         bmvs[0][coded[codedi]][1]);
-        mb4mvbitsb+=12;
-      }
-      /*We use the same opportunity cost method of estimating the cost of
-         coding the motion vectors with the two different schemes as we do for
-         estimating the cost of the mode labels.
-        However, because there are only two schemes and they're both pretty
-         simple, this can just be done inline.*/
-      bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
-       OC_MINI(mvbitsa,mvbitsb);
-      bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
-       OC_MINI(mvbitsa,mvbitsb);
-      bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
-       mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
-      /*Finally, pick the mode with the cheapest estimated bit cost.*/
-      mb->mode=0;
-      for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
-        /*Do not select 4MV mode when not all the luma blocks are coded when
-           we're in VP3 compatibility mode.*/
-        if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
-          continue;
-        }
-        mb->mode=modei;
-      }
-#if defined(OC_BITRATE_STATS)
-      /*Remember the error for the mode we selected in each fragment.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        efrag=_enc->frinfo+fragi;
-        efrag->eerror=err[mb->mode][mapi];
-      }
-#endif
-      /*Go back and store the selected qi index corresponding to the selected
-         mode in each fragment.*/
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        frag=_enc->state.frags+fragi;
-        efrag=_enc->frinfo+fragi;
-        efrag->qii=(unsigned char)
-         frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
-        frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
-      }
-      inter_bits+=bits[mb->mode];
-      intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
-      oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
-      switch(mb->mode){
-        case OC_MODE_INTER_MV:{
-          mvbitsa+=mbpmvbitsa;
-          mvbitsb+=12;
-          last_mv[1][0]=last_mv[0][0];
-          last_mv[1][1]=last_mv[0][1];
-          mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
-          mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
-        }break;
-        case OC_MODE_INTER_MV_LAST:{
-          mbmv[0]=last_mv[0][0];
-          mbmv[1]=last_mv[0][1];
-        }break;
-        case OC_MODE_INTER_MV_LAST2:{
-          mbmv[0]=last_mv[1][0];
-          mbmv[1]=last_mv[1][1];
-          last_mv[1][0]=last_mv[0][0];
-          last_mv[1][1]=last_mv[0][1];
-          last_mv[0][0]=mbmv[0];
-          last_mv[0][1]=mbmv[1];
-        }break;
-        case OC_MODE_INTER_MV_FOUR:{
-          mvbitsa+=mb4mvbitsa;
-          mvbitsb+=mb4mvbitsb;
-          if(ncoded_luma>0){
-            /*After 4MV mode, the last MV is the one from the last coded luma
-               block.*/
-            last_mv[1][0]=last_mv[0][0];
-            last_mv[1][1]=last_mv[0][1];
-            last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
-            last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
-          }
-        }break;
-        case OC_MODE_GOLDEN_MV:{
-          mvbitsa+=mbgmvbitsa;
-          mvbitsb+=12;
-          mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
-          mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
-        }break;
-      }
-      if(OC_MODE_HAS_MV[mb->mode]){
-        /*Special case 4MV mode.
-          MVs are stored in bmvs.*/
-        if(mb->mode==OC_MODE_INTER_MV_FOUR){
-          for(codedi=0;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            pli=mapi>>2;
-            bi=mapi&3;
-            fragi=mb->map[pli][bi];
-            frag=_enc->state.frags+fragi;
-            frag->mbmode=mb->mode;
-            frag->mv[0]=bmvs[!!pli][bi][0];
-            frag->mv[1]=bmvs[!!pli][bi][1];
-          }
-        }
-        /*For every other mode with a MV, it is stored in mbmv.*/
-        else{
-          for(codedi=0;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            fragi=mb->map[mapi>>2][mapi&3];
-            frag=_enc->state.frags+fragi;
-            frag->mbmode=mb->mode;
-            frag->mv[0]=mbmv[0];
-            frag->mv[1]=mbmv[1];
-          }
-        }
-      }
-      /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
-      else{
-        for(codedi=0;codedi<ncoded;codedi++){
-          mapi=coded[codedi];
-          fragi=mb->map[mapi>>2][mapi&3];
-          frag=_enc->state.frags+fragi;
-          frag->mbmode=mb->mode;
-          frag->mv[0]=frag->mv[1]=0;
-        }
-      }
-    }
-  }
-  /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
-  if(mvbitsb<mvbitsa){
-    _enc->mv_scheme=1;
-    inter_bits+=mvbitsb;
-  }
-  else{
-    _enc->mv_scheme=0;
-    inter_bits+=mvbitsa;
-  }
-  inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
-   _enc->mode_scheme_chooser.scheme_list[0]];
-  /*The easiest way to count the bits needed for coded/not coded fragments is
-     to code them.
-    We need to do this anyway, might as well do it now.*/
-  oggpackB_reset(&_enc->opb_coded_flags);
-  inter_bits+=oc_enc_partial_sb_flags_pack(_enc,&_enc->opb_coded_flags);
-  inter_bits+=oc_enc_coded_sb_flags_pack(_enc,&_enc->opb_coded_flags);
-  inter_bits+=oc_enc_coded_block_flags_pack(_enc,&_enc->opb_coded_flags);
-  /*Select the quantizer list for INTER frames.*/
-  _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
-  for(qii=0;qii<_enc->state.nqis;qii++){
-    _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
-  }
-  if(intra_bits>inter_bits){
-    _enc->vbr->est_bits=inter_bits;
-    return OC_INTER_FRAME;
-  }
-  /*All INTRA mode is smaller, but we haven't counted up the cost of all the
-     not coded fragments we will now have to code.*/
-  uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
-  for(pli=0;pli<3;pli++){
-    uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
-    while(uncoded_fragi-->uncoded_fragi_end){
-      fragi=*uncoded_fragi;
-      frag=_enc->state.frags+fragi;
-      /*Assume a very small bit cost for invalid fragments.*/
-      if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
-      else{
-        int eerror;
-        eerror=0;
-        efrag=_enc->frinfo+fragi;
-        for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
-#if defined(OC_BITRATE_STATS)
-        efrag->eerror=eerror;
-#endif
-        qi=_enc->qis[OC_INTRA_FRAME][0];
-        for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
-          if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
-           efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
-            qi=_enc->qis[OC_INTRA_FRAME][qii];
-          }
-        }
-        intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
-         OC_MINI(eerror>>8,15)];
-        /*If it turns out INTRA mode was more expensive, we're done.*/
-        if(intra_bits>inter_bits){
-          _enc->vbr->est_bits=inter_bits;
-          return OC_INTER_FRAME;
-        }
-      }
-    }
-  }
-  /*So, we've compared the full cost estimates, and INTRA is still better.
-    Code an INTRA frame instead.*/
-  oc_enc_vbr_mark_all_intra(_enc);
-  _enc->vbr->est_bits=intra_bits;
-  return OC_INTRA_FRAME;
-}
-
-/*A pipeline stage for transforming, quantizing, and tokenizing the frame.*/
-
-static int oc_vbr_pipe_start(oc_enc_pipe_stage *_stage){
-  int pli;
-  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
-  return 0;
-}
-
-static int oc_vbr_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
-  int pli;
-  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
-  return 0;
-}
-
-static int oc_vbr_pipe_end(oc_enc_pipe_stage *_stage){
-  oc_enc_ctx *enc;
-  int         ret;
-  enc=_stage->enc;
-  if(enc->state.curframe_num==0||
-   enc->state.curframe_num-enc->state.keyframe_num>=
-   enc->keyframe_frequency_force){
-    enc->state.frame_type=OC_INTRA_FRAME;
-    oc_enc_vbr_quant_sel_quality(enc,1);
-    oc_enc_vbr_mark_all_intra(enc);
-  }
-  else{
-    oc_enc_mark_coded(enc);
-    /*Only proceed if we have some coded blocks.
-      No coded blocks -> dropped frame -> 0 byte packet.*/
-    if(enc->ncoded_frags>0){
-      oc_enc_vbr_quant_sel_quality(enc,0);
-      enc->state.frame_type=oc_enc_choose_mbmodes(enc);
-      if(enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(enc);
-    }
-  }
-  /*Only initialize subsequent stages after we know how many fragments will be
-     encoded, and at what quality (so the loop filter can be set up
-     properly).*/
-  if(_stage->next!=NULL){
-    ret=(*_stage->next->pipe_start)(_stage->next);
-    if(ret<0)return ret;
-  }
-  if(enc->ncoded_frags>0){
-    /*TODO: These stages could be pipelined with reconstruction.*/
-    oc_enc_vbr_quant_dc(enc);
-    oc_enc_vbr_residual_tokenize(enc);
-  }
-  if(_stage->next!=NULL){
-    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
-    if(ret<0)return ret;
-    return (*_stage->next->pipe_end)(_stage->next);
-  }
-  return 0;
-}
-
-/*Initialize the transform, quantization, and tokenization stage of the
-   pipeline.
-  _enc: The encoding context.*/
-static void oc_vbr_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
-  _stage->enc=_enc;
-  _stage->next=NULL;
-  _stage->pipe_start=oc_vbr_pipe_start;
-  _stage->pipe_proc=oc_vbr_pipe_process;
-  _stage->pipe_end=oc_vbr_pipe_end;
-}
-
-
-static int oc_enc_vbr_init(oc_enc_vbr_ctx *_vbr,oc_enc_ctx *_enc){
-  _vbr->cfg.qi=_enc->state.info.quality;
-  _vbr->cfg.kf_qi_min=_vbr->cfg.df_qi_min=0;
-  _vbr->cfg.kf_qi_max=_vbr->cfg.df_qi_max=63;
-  _vbr->enc=_enc;
-  _vbr->impmap=oc_impmap_alloc(_enc);
-  _vbr->psych=oc_psych_alloc(_enc);
-  oc_vbr_pipe_init(&_vbr->pipe,_enc);
-  return 0;
-}
-
-static void oc_enc_vbr_clear(oc_enc_vbr_ctx *_vbr){
-  oc_psych_free(_vbr->psych);
-  oc_impmap_free(_vbr->impmap);
-}
-
-static int oc_enc_vbr_cfg(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
-  if(_cfg->qi<0||_cfg->qi>63||_cfg->kf_qi_min<0||_cfg->kf_qi_min>63||
-   _cfg->kf_qi_max<_cfg->kf_qi_min||_cfg->kf_qi_max>63||
-   _cfg->df_qi_min<0||_cfg->df_qi_min>63||
-   _cfg->df_qi_max<_cfg->df_qi_min||_cfg->df_qi_max>63){
-    return -OC_EINVAL;
-  }
-  memcpy(&_vbr->cfg,_cfg,sizeof(_vbr->cfg));
-  return 0;
-}
-
-static oc_enc_pipe_stage *oc_enc_vbr_create_pipe(oc_enc_vbr_ctx *_vbr){
-  oc_enc_pipe_stage *pipe;
-  _vbr->enc->fill_pipe.next=&_vbr->enc->pack_pipe;
-  _vbr->pipe.next=&_vbr->enc->copy_pipe;
-  /*TODO: Disable spatial masking and CSF filtering based on
-     application-specified speed level.*/
-  pipe=oc_psych_prepend_to_pipe(_vbr->psych,&_vbr->pipe);
-  _vbr->enc->fdct_pipe.next=pipe;
-  /*TODO: Disable impmap based on application-specified speed level.*/
-  pipe=oc_impmap_prepend_to_pipe(_vbr->impmap,&_vbr->enc->fdct_pipe);
-  pipe=oc_mcenc_prepend_to_pipe(_vbr->enc->mcenc,pipe);
-  return pipe;
-}
-
-
-oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc){
-  oc_enc_vbr_ctx *vbr;
-  vbr=(oc_enc_vbr_ctx *)_ogg_malloc(sizeof(*vbr));
-  oc_enc_vbr_init(vbr,_enc);
-  return vbr;
-}
-
-void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr){
-  if(_vbr!=NULL){
-    oc_enc_vbr_clear(_vbr);
-    _ogg_free(_vbr);
-  }
-}
-
-int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
-  if(_cfg!=NULL){
-    int ret;
-    ret=oc_enc_vbr_cfg(_vbr,_cfg);
-    if(ret<0)return ret;
-  }
-  /*Map the qi to a multiple of JND values.*/
-  _vbr->qscale=_vbr->cfg.qi>=63?0.5F:1.5F*OC_POWF(2,0.0625F*(64-_vbr->cfg.qi));
-  _vbr->enc->pipe=oc_enc_vbr_create_pipe(_vbr);
-  /*TODO: Implement a real speed level.*/
-  _vbr->enc->speed_max=0;
-  _vbr->enc->set_speed=oc_enc_set_speed_null;
-  return 0;
-}
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encvbr.h"
+#include "fdct.h"
+
+
+
+/*Returns the number of bits used by the given motion vector with the VLC
+   motion vector codes (as opposed to the CLC codes, which always use 12 bits).
+  _dx: The X component of the vector, in half-pel units.
+  _dy: The Y component of the vector, in half-pel units.
+  Return: The number of bits required to store the vector with the VLC codes.*/
+static int oc_mvbitsa(int _dx,int _dy){
+  return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
+}
+
+
+
+/*Select the set of quantizers to use for the current frame for each possible
+   frame type (intra or inter).
+  This does not assign a quantizer to each fragment, as that depends on the
+   quantizer type used and thus is done during mode decision.*/
+static void oc_enc_vbr_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
+  unsigned              qmax[2][3];
+  int                   qi_min[2];
+  int                   qi_max[2];
+  int                   fti;
+  int                   qti;
+  int                   pli;
+  int                   dc_qi[2];
+  qi_min[0]=_enc->vbr->cfg.kf_qi_min;
+  qi_min[1]=_enc->vbr->cfg.df_qi_min;
+  qi_max[0]=_enc->vbr->cfg.kf_qi_max;
+  qi_max[1]=_enc->vbr->cfg.df_qi_max;
+  /*The first quantizer value is used for DC coefficients.
+    Select one that allows us to meet our quality requirements.*/
+  for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
+    qmax[qti][pli]=OC_MAXI(2U*_enc->vbr->dc_tol_mins[pli],
+     OC_DC_QUANT_MIN[qti]);
+  }
+  /*For intra frames...(containing just INTRA fragments)*/
+  for(dc_qi[0]=qi_min[0];dc_qi[0]<qi_max[0];dc_qi[0]++){
+    if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
+     _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
+     _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
+      break;
+    }
+  }
+  /*For inter frames...(containing both INTER and INTRA fragments)*/
+  if(!_intra_only){
+    for(dc_qi[1]=OC_CLAMPI(qi_min[1],dc_qi[0],qi_max[1]);dc_qi[1]<qi_max[1];
+     dc_qi[1]++){
+      if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
+       _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
+       _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
+        break;
+      }
+    }
+  }
+  /*Now we select a full qi list for each frame type.*/
+  for(fti=0;fti<1+!_intra_only;fti++){
+    oc_fragment_enc_info *efrag;
+    int                   ncoded_fragis;
+    int                   nqis[64];
+    int                   qi;
+    int                   qi0;
+    int                   qi1;
+    int                   qi2;
+    /*Here we count up the number of fragments that can use each qi value.
+      Unless we know this is an intra frame, we don't know what quantizer type
+       will be used for each fragment, so we just count both of them.*/
+    memset(nqis,0,sizeof(nqis));
+    if(fti){
+      int *coded_fragi;
+      int *coded_fragi_end;
+      coded_fragi=_enc->state.coded_fragis;
+      ncoded_fragis=_enc->state.ncoded_fragis[0]+
+       _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
+      coded_fragi_end=coded_fragi+ncoded_fragis;
+      for(;coded_fragi<coded_fragi_end;coded_fragi++){
+        efrag=_enc->frinfo+*coded_fragi;
+        for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
+      }
+    }
+    else{
+      oc_fragment_enc_info *efrag_end;
+      ncoded_fragis=_enc->state.nfrags;
+      efrag=_enc->frinfo;
+      efrag_end=efrag+ncoded_fragis;
+      for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
+    }
+    /*We'll now choose the qi values that divide the fragments into equally
+       sized groups, or as close as we can make it.
+      We account for the DC coefficients by adding an extra amount to the qi
+       value they require.
+      Since there are usually many more DC coefficients coded than any one AC
+       coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
+    nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
+    /*Convert this into a moment table.*/
+    for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
+    /*If we have a lower limit on the QI range, promote and fragments with a
+       smaller QI, to ensure they're counted.*/
+    if(qi_min[fti]>0)nqis[qi_min[fti]]=nqis[0];
+    /*Select our first quantizer.*/
+    for(qi0=qi_max[fti]+1;qi0-->qi_min[fti]&&nqis[qi0]<=0;);
+    for(qi1=qi0-1;qi1>=qi_min[fti]&&nqis[qi1]<=nqis[qi0];qi1--);
+    /*Test to make sure there are even two unique quantizers.*/
+    if(qi1>=qi_min[fti]){
+      ogg_int64_t best_metric;
+      ogg_int64_t metric;
+      int         best_qi1;
+      int         best_qi2;
+      int         qii;
+      for(qi2=qi1-1;qi2>=qi_min[fti]&&nqis[qi2]<=nqis[qi1];qi2--);
+      /*Test to make sure there are three unique quantizers.*/
+      if(qi2>=0){
+        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+         (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+        best_qi1=qi1;
+        best_qi2=qi2;
+        for(;nqis[qi1]<nqis[1];qi1--){
+          for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
+            metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+             (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+            if(metric>=best_metric){
+              best_qi1=qi1;
+              best_qi2=qi2;
+              best_metric=metric;
+            }
+          }
+        }
+        _enc->qis[fti][0]=qi0;
+        _enc->qis[fti][1]=best_qi1;
+        _enc->qis[fti][2]=best_qi2;
+        _enc->nqis[fti]=3;
+      }
+      else{
+        best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+        best_qi1=qi1;
+        if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
+          metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+          if(metric>best_metric){
+            best_qi1=qi1;
+            best_metric=metric;
+          }
+        }
+        _enc->qis[fti][0]=qi0;
+        _enc->qis[fti][1]=best_qi1;
+        _enc->nqis[fti]=2;
+      }
+      /*Right now qis[0] is the largest.
+        We want to use the smallest that is still large enough for our DC
+         coefficients.*/
+      for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
+        qi0=_enc->qis[fti][0];
+        _enc->qis[fti][0]=_enc->qis[fti][qii];
+        _enc->qis[fti][qii]=qi0;
+      }
+    }
+    else{
+      _enc->qis[fti][0]=qi0;
+      _enc->nqis[fti]=1;
+    }
+    /*If we're in VP3 compatibility mode, just use the first quantizer.*/
+    if(_enc->vp3_compatible)_enc->nqis[fti]=1;
+  }
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also selects a quantizer value for each fragment and builds up the
+   coded fragment list (in coded order) and clears the uncoded fragment list.
+  It does not update the coded macro block list, as that is not used when
+   coding INTRA frames.*/
+static void oc_enc_vbr_mark_all_intra(oc_enc_ctx *_enc){
+  oc_sb *sb;
+  oc_sb *sb_end;
+  int    pli;
+  int    qii;
+  int    ncoded_fragis;
+  int    prev_ncoded_fragis;
+  /*Select the quantizer list for INTRA frames.*/
+  _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
+  }
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb=sb_end=_enc->state.sbs;
+  for(pli=0;pli<3;pli++){
+    const oc_fragment_plane *fplane;
+    fplane=_enc->state.fplanes+pli;
+    sb_end+=fplane->nsbs;
+    for(;sb<sb_end;sb++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
+          oc_fragment_enc_info *efrag;
+          oc_fragment          *frag;
+          int                   fragi;
+          int                   best_qii;
+          fragi=sb->map[quadi][bi];
+          frag=_enc->state.frags+fragi;
+          frag->coded=1;
+          frag->mbmode=OC_MODE_INTRA;
+          efrag=_enc->frinfo+fragi;
+          best_qii=0;
+          for(qii=1;qii<_enc->state.nqis;qii++){
+            if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
+             (_enc->state.qis[best_qii]<efrag->qi_min[0]||
+             _enc->state.qis[qii]<_enc->state.qis[best_qii])){
+              best_qii=qii;
+            }
+          }
+          efrag->qii=(unsigned char)best_qii;
+          frag->qi=_enc->state.qis[best_qii];
+          _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+#if defined(OC_BITRATE_STATS)
+          /*Compute the error function used for intra mode fragments.
+            This function can only use information known at mode decision time, and
+             so excludes the DC component.
+            TODO: Separate this out somewhere more useful.*/
+          {
+            oc_fragment_enc_info *efrag;
+            int                   ci;
+            int                   eerror;
+            efrag=_enc->frinfo+fragi;
+            eerror=0;
+            for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+            efrag->eerror=eerror;
+          }
+#endif
+        }
+      }
+    }
+    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+    _enc->state.nuncoded_fragis[pli]=0;
+  }
+  _enc->ncoded_frags=ncoded_fragis;
+}
+
+
+
+/*Quantize and predict the DC coefficients.
+  This is done in a separate step because the prediction of DC coefficients
+   occurs in image order, not in the Hilbert-curve order, unlike the rest of
+   the encoding process.*/
+static void oc_enc_vbr_quant_dc(oc_enc_ctx *_enc){
+  oc_fragment_enc_info *efrag;
+  oc_fragment          *frag;
+  int                   pli;
+  frag=_enc->state.frags;
+  efrag=_enc->frinfo;
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    unsigned           fquant;
+    unsigned           iquant;
+    int                pred_last[3];
+    int                fragx;
+    int                fragy;
+    pred_last[OC_FRAME_GOLD]=0;
+    pred_last[OC_FRAME_PREV]=0;
+    pred_last[OC_FRAME_SELF]=0;
+    fplane=_enc->state.fplanes+pli;
+    for(fragy=0;fragy<fplane->nvfrags;fragy++){
+      for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
+        int qc_pred;
+        int qc;
+        if(!frag->coded)continue;
+        qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
+        /*Fragments outside the displayable region must still be coded in key
+           frames.
+          To minimize wasted bits, just use the predicted DC value.
+          TODO: We might do a better job in the lower-left hand corner by
+           propagating over the DC value of the first actually coded fragment,
+           but for the moment this is not done.*/
+        if(frag->invalid)qc=0;
+        else{
+          int c;
+          int c_abs;
+          int qti;
+          /*We now center the DC coefficient range around the predicted value
+             and perform token bits optimization based on the HVS-determined
+             tolerance range.
+            For more details, see oc_enc_vbr_frag_quant_tokenize().*/
+          qti=frag->mbmode!=OC_MODE_INTRA;
+          iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
+          c=efrag->dct_coeffs[0]-qc_pred*iquant;
+          c_abs=abs(c);
+          if(c_abs<=efrag->tols[0])qc=0;
+          else{
+            int qc_signed[2];
+            int qc_max;
+            int qc_min;
+            int qc_offs;
+            int c_sign;
+            int c_min;
+            int c_recon;
+            int cati;
+            fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
+            qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+            c_sign=c<0;
+            c_recon=(qc_max-1)*iquant;
+            c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
+            for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
+            if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
+            else{
+              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+               cati++){
+                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+              }
+              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+            }
+            qc_signed[0]=qc;
+            qc_signed[1]=-qc;
+            qc=qc_signed[c_sign];
+          }
+        }
+        pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
+        efrag->dct_coeffs[0]=(ogg_int16_t)qc;
+      }
+    }
+  }
+}
+
+/*Quantize and tokenize the given fragment.
+  _efrag:   The encoder information for the fragment to quantize.
+  _qcoeffs: The quantized coefficients, in zig-zag order.
+  _fquant:  The forward quantization matrix to use.
+  _iquant:  The inverse quantization matrix to use.
+  Return: The number of coefficients before any final zero run.*/
+static int oc_enc_vbr_frag_quant_tokenize(oc_enc_ctx *_enc,
+ oc_fragment_enc_info *_efrag,ogg_int16_t _qcoeffs[64],
+ const ogg_uint16_t _fquant[64],const ogg_uint16_t _iquant[64]){
+  int zzi;
+  int zrun;
+  int qc;
+  int qc_offs;
+  int c_sign;
+  int cati;
+  int tli;
+  /*The DC coefficient is already quantized (it had to be for DC prediction).
+    Here we just tokenize it.*/
+  if(_efrag->dct_coeffs[0]){
+    qc=abs(_efrag->dct_coeffs[0]);
+    c_sign=_efrag->dct_coeffs[0]<0;
+    switch(qc){
+      case 1:{
+        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+         (unsigned char)(OC_ONE_TOKEN+c_sign);
+      }break;
+      case 2:{
+        _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+         (unsigned char)(OC_TWO_TOKEN+c_sign);
+      }break;
+      default:{
+        if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
+          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+           (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
+          _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
+        }
+        else{
+          qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+          for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
+            qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+          }
+          _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+           (unsigned char)(OC_DCT_VAL_CAT3+cati);
+          _enc->extra_bits[0][_enc->nextra_bits[0]++]=
+           (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+        }
+      }
+    }
+    zrun=0;
+  }
+  else zrun=1;
+  /*Now we quantize and tokenize each AC coefficient.*/
+  for(zzi=1;zzi<64;zzi++){
+    int qc_signed[2];
+    int qc_max;
+    int qc_min;
+    int c_sign;
+    int c_abs;
+    int c_min;
+    int c_recon;
+    int ci;
+    ci=OC_FZIG_ZAG[zzi];
+    c_abs=abs(_efrag->dct_coeffs[ci]);
+    /*Best case: we can encode this as a zero.*/
+    if(c_abs<=_efrag->tols[ci]){
+      zrun++;
+      _qcoeffs[zzi]=0;
+    }
+    else{
+      c_sign=_efrag->dct_coeffs[ci]<0;
+      /*qc_max is the most accurate quantized value.
+        This is the largest possible (absolute) value we will use.*/
+      qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+      /*qc_min is the smallest possible (by absolute value) quantized value
+         whose dequantized value is within the HVS-determined tolerance
+         range.*/
+      /*TODO: qc_min could be computed by a division (we do not want to allow
+         the rounding errors that are possible with the mul+shift quantization
+         used for qc_max), which would allow qc_max to be calculated only if
+         needed below.
+        Is this faster?
+        Who knows.*/
+      c_recon=(qc_max-1)*_iquant[ci];
+      c_min=c_abs-_efrag->tols[ci];
+      for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
+      /*We now proceed to find a token that is as close to qc_max as possible,
+         but does not use any more bits than would be required for qc_min.
+        The general assumption we make is that encoding a value closer to 0
+         always uses fewer bits.
+        qc_min can still reach 0 here despite the test above, if the quantizer
+         value is larger than the tolerance (which can happen for very small
+         tolerances; the quantizer value has a minimum it cannot go below).*/
+      if(qc_min==0){
+        zrun++;
+        _qcoeffs[zzi]=0;
+      }
+      else{
+        /*If we have an outstanding zero run, code it now.*/
+        if(zrun>0){
+          /*The zero run tokens appear on the list for the first zero in the
+             run.*/
+          tli=zzi-zrun;
+          /*Second assumption: coding a combined run/value token always uses
+             fewer bits than coding them separately.*/
+          /*CAT1 run/value tokens: the value is 1.*/
+          if(qc_min==1&&zrun<=17){
+            if(zrun<=5){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)c_sign;
+            }
+            else if(zrun<=9){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT1B;
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<2)+zrun-6);
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT1C;
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<3)+zrun-10);
+            }
+            qc_signed[0]=1;
+            qc_signed[1]=-1;
+            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            zrun=0;
+            /*Skip coding the DCT value below.*/
+            continue;
+          }
+          /*CAT2 run/value tokens: the value is 2-3.*/
+          else if(qc_min<=3&&zrun<=3){
+            if(zrun==1){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT2A;
+              qc=OC_MINI(3,qc_max);
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<1)+qc-2);
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_RUN_CAT2B;
+              qc=OC_MINI(3,qc_max);
+              _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+               (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
+            }
+            qc_signed[0]=qc;
+            qc_signed[1]=-qc;
+            _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            zrun=0;
+            /*Skip coding the DCT value below.*/
+            continue;
+          }
+          /*The run is too long or the quantized value too large: code them
+             separately.*/
+          else{
+            /*This is stupid: non-short ZRL tokens are never used for run
+               values less than 9, but codewords are reserved for them,
+               wasting bits.
+              Yes, yes, this would've meant a non-constant number of extra
+               bits for this token, but even so.*/
+            if(zrun<=8){
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_SHORT_ZRL_TOKEN;
+            }
+            else{
+              _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+               OC_DCT_ZRL_TOKEN;
+            }
+            _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+             (ogg_uint16_t)(zrun-1);
+            zrun=0;
+          }
+        }
+        /*No zero run, or the run and the qc value are being coded
+           separately.*/
+        switch(qc_min){
+          case 1:{
+            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+             (unsigned char)(OC_ONE_TOKEN+c_sign);
+            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<1)+1);
+          }break;
+          case 2:{
+            _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+             (unsigned char)(OC_TWO_TOKEN+c_sign);
+            _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<2)+2);
+          }break;
+          default:{
+            if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
+              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+               (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
+              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
+               (ogg_uint16_t)c_sign;
+              qc_signed[0]=qc_min;
+              qc_signed[1]=-qc_min;
+              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+            }
+            else{
+              qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+              for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+               cati++){
+                qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+              }
+              /*qc_min can be encoded in this category.
+                Since all DCT values in the category use the same number of
+                 bits, we encode the closest value to qc_max.
+                This is either qc_max itself, if it is in the category's
+                 range, or the largest value in the category.*/
+              qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+              qc_signed[0]=qc;
+              qc_signed[1]=-qc;
+              _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+              _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+               (unsigned char)(OC_DCT_VAL_CAT3+cati);
+              _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
+               ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+            }
+          }
+        }
+      }
+    }
+  }
+  /*If there's a trailing zero run, code an EOB token.*/
+  if(zrun>0){
+    int old_tok;
+    int toki;
+    int ebi;
+    tli=64-zrun;
+    toki=_enc->ndct_tokens[tli]-1;
+    if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
+    else old_tok=-1;
+    /*Try to extend an EOB run.*/
+    switch(old_tok){
+      case OC_DCT_EOB1_TOKEN:
+      case OC_DCT_EOB2_TOKEN:{
+        _enc->dct_tokens[tli][toki]++;
+      }break;
+      case OC_DCT_EOB3_TOKEN:{
+        _enc->dct_tokens[tli][toki]++;
+        _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
+      }break;
+      case OC_DCT_REPEAT_RUN0_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          _enc->extra_bits[tli][ebi]=0;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN1_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          _enc->extra_bits[tli][ebi]=0;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN2_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
+        else{
+          _enc->dct_tokens[tli][toki]++;
+          /*Again stupid: we could encode runs up to 4127, but inexplicably
+             they don't subtract the bottom of the range here, so we can only
+             go to 4095 (unless we want to change the spec to deal with
+             wrap-around).*/
+          _enc->extra_bits[tli][ebi]=32;
+        }
+      }break;
+      case OC_DCT_REPEAT_RUN3_TOKEN:{
+        ebi=_enc->nextra_bits[tli]-1;
+        if(_enc->extra_bits[tli][ebi]<4095){
+          _enc->extra_bits[tli][ebi]++;
+          break;
+        }
+        /*else fall through.*/
+      }
+      /*Start a new EOB run.*/
+      default:{
+        _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
+      }
+    }
+  }
+  /*Return the number of coefficients before the final zero run.*/
+  return 64-zrun;
+}
+
+static void oc_enc_vbr_residual_tokenize(oc_enc_ctx *_enc){
+  int *coded_fragi;
+  int *coded_fragi_end;
+  int    pli;
+  int    zzi;
+  /*Clear any existing DCT tokens.*/
+  for(zzi=0;zzi<64;zzi++){
+    _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
+    _enc->extra_bits_offs[zzi]=0;
+  }
+  coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
+  for(pli=0;pli<3;pli++){
+    memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
+     sizeof(_enc->dct_token_offs[pli]));
+    coded_fragi_end+=_enc->state.ncoded_fragis[pli];
+    for(;coded_fragi<coded_fragi_end;coded_fragi++){
+      oc_quant_table       *iquants;
+      oc_fragment          *frag;
+      oc_fragment_enc_info *efrag;
+      ogg_int16_t           qcoeffs[64];
+      int                   fragi;
+      int                   qti;
+      int                   nnzc;
+      fragi=*coded_fragi;
+      frag=_enc->state.frags+fragi;
+      efrag=_enc->frinfo+fragi;
+      qti=frag->mbmode!=OC_MODE_INTRA;
+      iquants=_enc->state.dequant_tables[qti][pli];
+      nnzc=oc_enc_vbr_frag_quant_tokenize(_enc,efrag,qcoeffs,
+       _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
+      /*While we're here and things are in cache, reconstruct the quantized
+         fragment.*/
+      oc_state_frag_recon(&_enc->state,frag,pli,qcoeffs,nnzc,nnzc,
+       iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
+    }
+  }
+  /*Merge the final EOB run of one coefficient list with the start of the
+     next, if possible.*/
+  for(zzi=1;zzi<64;zzi++){
+    static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
+    static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
+    int old_tok1;
+    int old_tok2;
+    int old_eb1;
+    int old_eb2;
+    int new_tok;
+    int toki;
+    int zzj;
+    int ebi;
+    int runl;
+    /*Make sure this coefficient has tokens at all.*/
+    if(_enc->ndct_tokens[zzi]<=0)continue;
+    /*Ensure the first token is an EOB run.*/
+    old_tok2=_enc->dct_tokens[zzi][0];
+    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Search for a previous coefficient that has any tokens at all.*/
+    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+    zzj=zzi-1;
+    do{
+      toki=_enc->ndct_tokens[zzj]-1;
+      if(toki>=_enc->dct_token_offs[0][zzj]){
+        old_tok1=_enc->dct_tokens[zzj][toki];
+        break;
+      }
+    }
+    while(zzj-->0);
+    /*Ensure its last token was an EOB run.*/
+    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Pull off the associated extra bits, if any, and decode the runs.*/
+    ebi=_enc->nextra_bits[zzj];
+    old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?_enc->extra_bits[zzj][--ebi]:0;
+    old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?_enc->extra_bits[zzi][0]:0;
+    runl=OC_EOB_OFFS[old_tok1]+old_eb1+OC_EOB_OFFS[old_tok2]+old_eb2;
+    /*We can't possibly combine these into one run.
+      It might be possible to split them more optimally, but we'll just leave
+       them as is.*/
+    if(runl>=4096)continue;
+    /*We CAN combine them into one run.*/
+    for(new_tok=OC_DCT_EOB1_TOKEN;
+     runl-OC_EOB_OFFS[new_tok]>=OC_EOB_RANGE[new_tok];new_tok++);
+    /*toki is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    _enc->dct_tokens[zzj][toki]=(unsigned char)new_tok;
+    /*Update the two token lists.*/
+    if(OC_DCT_TOKEN_EXTRA_BITS[new_tok]){
+      _enc->extra_bits[zzj][ebi++]=(ogg_uint16_t)(
+       runl-OC_EOB_OFFS[new_tok]);
+    }
+    _enc->nextra_bits[zzj]=ebi;
+    _enc->dct_token_offs[0][zzi]++;
+    /*Note: We don't bother to update the offsets for planes 1 and 2 if
+       planes 0 or 1 don't have any tokens.
+      This turns out not to matter due to the way we use the offsets later.*/
+    if(OC_DCT_TOKEN_EXTRA_BITS[old_tok2])_enc->extra_bits_offs[zzi]++;
+  }
+}
+
+/*Marks each fragment as coded or not, based on the coefficient-level
+   thresholds computed in the psychovisual stage.
+  The MB mode of the fragments are not set, as they will be computed in
+   oc_enc_choose_mbmodes().
+  This also builds up the coded fragment and uncoded fragment lists.
+  The coded MB list is not built up.
+  That is done during mode decision.*/
+static void oc_enc_vbr_mark_coded(oc_enc_ctx *_enc){
+  oc_sb *sb;
+  oc_sb *sb_end;
+  int    pli;
+  int    bli;
+  int    ncoded_fragis;
+  int    prev_ncoded_fragis;
+  int    nuncoded_fragis;
+  int    prev_nuncoded_fragis;
+  _enc->nblock_coded_flags=bli=0;
+  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
+  sb=sb_end=_enc->state.sbs;
+  for(pli=0;pli<3;pli++){
+    const oc_fragment_plane *fplane;
+    int                      ystride;
+    int                      prev_refi;
+    fplane=_enc->state.fplanes+pli;
+    sb_end+=fplane->nsbs;
+    prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
+    ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
+    for(;sb<sb_end;sb++){
+      int quadi;
+      sb->coded_fully=1;
+      sb->coded_partially=0;
+      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          int fragi;
+          fragi=sb->map[quadi][bi];
+          if(fragi>=0){
+            oc_fragment *frag;
+            int          flag;
+            frag=_enc->state.frags+fragi;
+            if(frag->invalid){
+              frag->coded=0;
+              *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+            }
+            else{
+              oc_fragment_enc_info *efrag;
+              ogg_int16_t           dct_buf[64];
+              int                   ci;
+              /*Check to see if the fragment can be skipped.
+                It is assumed that a skipped fragment always takes fewer bits
+                 than a coded fragment, though this may not necessarily be true.
+                A single skipped fragment could take up to 34 bits to encode
+                 its location in the RLE scheme Theora uses */
+              oc_frag_intra_fdct(frag,dct_buf,ystride,prev_refi);
+              efrag=_enc->frinfo+fragi;
+              /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
+                 ensures we mark a fragment as skipped if it would be quantized
+                 to all zeros in OC_MODE_INTER_NOMV.
+                These minimum quantizers represent the maximum quality the
+                 format is capable of, and can be larger than our tolerances.
+                The minimum for INTER modes is twice the minimum for INTRA
+                 modes, so technically if the tolerances are below this
+                 threshold, we might be able to do a better job representing
+                 this fragment by coding it in INTRA mode.
+                But the number of extra bits required to do that would be
+                 ridiculous, so we give up our devotion to minimum quality just
+                 this once.
+
+                Note: OC_DC_QUANT_MIN[0] should actually be
+                 OC_DC_QUANT_MIN[1]>>1, but in this case those are
+                 equivalent.*/
+              ci=0;
+              if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
+               OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
+                for(ci++;ci<64;ci++){
+                  if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
+                   OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
+                    break;
+                  }
+                }
+              }
+              if(ci>=64){
+                frag->coded=0;
+                *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+              }
+              else{
+                frag->coded=1;
+                _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+              }
+            }
+            flag=frag->coded;
+            sb->coded_fully&=flag;
+            sb->coded_partially|=flag;
+            _enc->block_coded_flags[bli++]=(char)flag;
+          }
+        }
+      }
+      /*If this is a partially coded super block, keep the entries just added
+         to the code block flag list.*/
+      if(!sb->coded_fully&&sb->coded_partially){
+        _enc->nblock_coded_flags=bli;
+      }
+      /*Otherwise, discard these entries from the list, as they are
+         implicit.*/
+      else{
+        sb->coded_partially=0;
+        bli=_enc->nblock_coded_flags;
+      }
+    }
+    _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+    _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
+    prev_nuncoded_fragis=nuncoded_fragis;
+  }
+  _enc->ncoded_frags=ncoded_fragis;
+}
+
+/*Selects an appropriate coding mode for each macro block.
+  A mode is chosen for the macro blocks with at least one coded fragment.
+  A bit cost estimate for coding the frame with the selected modes is made,
+   and a similar estimate is made for coding the frame as a key frame.
+  These estimates are used to select the optimal frame type.
+  Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
+static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_fragment_enc_info   *efrag;
+  oc_fragment            *frag;
+  oc_mb                  *mb;
+  oc_mb_enc_info         *mbinfo;
+  char                    last_mv[2][2];
+  int                    *uncoded_fragi;
+  int                    *uncoded_fragi_end;
+  int                     best_qii;
+  int                     qii;
+  int                     qi;
+  int                     pli;
+  int                     mbi;
+  int                     fragi;
+  int                     ci;
+  int                     nmbs;
+  int                     mvbitsa;
+  int                     mvbitsb;
+  int                     intra_bits;
+  int                     inter_bits;
+  nmbs=_enc->state.nmbs;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+  oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
+  memset(last_mv,0,sizeof(last_mv));
+  mbinfo=_enc->mbinfo;
+  mvbitsa=mvbitsb=0;
+  inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
+  intra_bits=inter_bits+3;
+  _enc->state.ncoded_mbis=0;
+  for(mbi=0;mbi<nmbs;mbi++){
+    mb=_enc->state.mbs+mbi;
+    if(mb->mode!=OC_MODE_INVALID){
+      oc_fragment_enc_info *efrag;
+      char                  bmvs[2][4][2];
+      char                  mbmv[2];
+      int                   err[OC_NMODES][12];
+      int                   bits[OC_NMODES];
+      int                   coded[13];
+      int                   frag_qii[12][2][2];
+      int                   ncoded;
+      int                   ncoded_luma;
+      int                   mapii;
+      int                   mapi;
+      int                   modei;
+      int                   codedi;
+      int                   mbintrabits;
+      int                   mbpmvbitsa;
+      int                   mbgmvbitsa;
+      int                   mb4mvbitsa;
+      int                   mb4mvbitsb;
+      int                   fti;
+      int                   qti;
+      int                   bi;
+      mbinfo=_enc->mbinfo+mbi;
+      /*Build up a list of coded fragments.*/
+      ncoded=0;
+      for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
+        mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
+        fragi=mb->map[mapi>>2][mapi&3];
+        if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      /*If we don't find any, mark this MB not coded and move on.*/
+      if(ncoded<=0){
+        mb->mode=OC_MODE_NOT_CODED;
+        /*Don't bother to do a MV search against the golden frame.
+          Just re-use the last vector, which should match well since the
+           contents of the MB haven't changed much.*/
+        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
+        mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
+        continue;
+      }
+      /*Count the number of coded blocks that are luma blocks, and replace the
+         block MVs for not-coded blocks with (0,0).*/
+      memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
+      /*Mark the end of the list so we don't go past it below.*/
+      coded[ncoded]=-1;
+      for(mapi=ncoded_luma=0;mapi<4;mapi++){
+        if(coded[ncoded_luma]==mapi)ncoded_luma++;
+        else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
+      }
+      /*Select a qi value for each coded fragment for each frame type and
+         quantizer type.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
+        for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
+          best_qii=0;
+          for(qii=1;qii<_enc->nqis[fti];qii++){
+            if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
+             (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
+             _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
+              best_qii=qii;
+            }
+          }
+          frag_qii[codedi][fti][qti]=best_qii;
+        }
+      }
+      /*Special case: If no luma blocks are coded, but some chroma blocks are,
+         then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
+         be explicitly coded for it.*/
+      if(ncoded_luma<=0){
+        mb->mode=OC_MODE_NOT_CODED;
+        /*Don't bother to do a MV search against the golden frame.*/
+        mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
+        /*We do collect bitrate stats for frame type decision.*/
+        mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          pli=mapi>>2;
+          fragi=mb->map[pli][mapi&3];
+          frag=_enc->state.frags+fragi;
+          efrag=_enc->frinfo+fragi;
+          /*Set the MB mode and MV in the fragment.*/
+          frag->mbmode=OC_MODE_INTER_NOMV;
+          frag->mv[0]=frag->mv[1]=0;
+          /*Calculate the bitrate estimates.*/
+          err[OC_MODE_INTRA][mapi]=0;
+          for(ci=1;ci<64;ci++){
+            err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+          }
+          err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+           OC_FRAME_PREV);
+          qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+          mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+           OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+          qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+          bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
+           OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
+          /*Also mark this fragment with the selected INTER qi.
+            It will be reset if we eventually code this as an INTRA frame.*/
+#if defined(OC_BITRATE_STATS)
+          efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
+#endif
+          efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
+          frag->qi=qi;
+        }
+        intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+        inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+        continue;
+      }
+      /*Otherwise, add this to the coded MB list.*/
+      _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
+      /*Compute the chroma MVs for the 4MV mode.*/
+      (*set_chroma_mvs)(bmvs[1],bmvs[0]);
+      /*Do a MV search against the golden frame.*/
+      oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
+      /*We are now ready to do mode decision for this macro block.
+        Mode decision is done by exhaustively examining all potential choices.
+        Since we use a minimum-quality encoding strategy, this amounts to
+         simply selecting the mode which uses the smallest number of bits,
+         since the minimum quality will be met in any mode.
+        Obviously, doing the motion compensation, fDCT, tokenization, and then
+         counting the bits each token uses is computationally expensive.
+        Theora's EOB runs can also split the cost of these tokens across
+         multiple fragments, and naturally we don't know what the optimal
+         choice of Huffman codes will be until we know all the tokens we're
+         going to encode in all the fragments.
+
+        So we use a simple approach to estimating the bit cost of each mode
+         based upon the SAD value of the residual.
+        The mathematics behind the technique are outlined by Kim \cite{Kim03},
+         but the process is very simple.
+        For each quality index and SAD value, we have a table containing the
+         average number of bits needed to code a fragment.
+        The SAD values are placed into a small number of bins (currently 16).
+        The bit counts are obtained by examining actual encoded frames, with
+         optimal Huffman codes selected and EOB bits appropriately divided
+         among all the blocks they involve.
+        A separate QIxSAD table is kept for each mode and color plane.
+        It may be possible to combine many of these, but only experimentation
+         will tell which ones truly represent the same distribution.
+
+        @ARTICLE{Kim03,
+          author="Hyun Mun Kim",
+          title="Adaptive Rate Control Using Nonlinear Regression",
+          journal="IEEE Transactions on Circuits and Systems for Video
+           Technology",
+          volume=13,
+          number=5,
+          pages="432--439",
+          month="May",
+          year=2003
+        }*/
+      memset(bits,0,sizeof(bits));
+      mbintrabits=0;
+      /*Find the SAD values for each coded fragment for each possible mode.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        pli=mapi>>2;
+        bi=mapi&3;
+        fragi=mb->map[pli][bi];
+        frag=_enc->state.frags+fragi;
+        efrag=_enc->frinfo+fragi;
+        err[OC_MODE_INTRA][mapi]=0;
+        for(ci=1;ci<64;ci++){
+          err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+        }
+        err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+         OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+         mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
+         pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
+         last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
+         last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
+         bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
+        err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
+         0,0,pli,OC_FRAME_GOLD);
+        err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+         mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
+         pli,OC_FRAME_GOLD);
+        /*Using these distortion values, estimate the number of bits needed to
+           code this fragment in each mode.*/
+        qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+        mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
+        bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+        qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+        for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
+          bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
+           OC_MINI(err[modei][mapi]>>6,15)];
+        }
+      }
+      /*Bit costs are stored in the table with extra precision.
+        Round them down to whole bits here.*/
+      for(modei=0;modei<OC_NMODES;modei++){
+        bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+      }
+      /*Estimate the cost of coding the label for each mode.
+        See comments at oc_mode_scheme_chooser_cost() for a description of the
+         method.*/
+      for(modei=0;modei<OC_NMODES;modei++){
+        bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
+         modei);
+      }
+      /*Add the motion vector bits for each mode that requires them.*/
+      mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
+       mbinfo->mvs[0][OC_FRAME_PREV][1]);
+      mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
+       mbinfo->mvs[0][OC_FRAME_GOLD][1]);
+      mb4mvbitsa=mb4mvbitsb=0;
+      for(codedi=0;codedi<ncoded_luma;codedi++){
+        mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
+         bmvs[0][coded[codedi]][1]);
+        mb4mvbitsb+=12;
+      }
+      /*We use the same opportunity cost method of estimating the cost of
+         coding the motion vectors with the two different schemes as we do for
+         estimating the cost of the mode labels.
+        However, because there are only two schemes and they're both pretty
+         simple, this can just be done inline.*/
+      bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
+       OC_MINI(mvbitsa,mvbitsb);
+      bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
+       OC_MINI(mvbitsa,mvbitsb);
+      bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
+       mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
+      /*Finally, pick the mode with the cheapest estimated bit cost.*/
+      mb->mode=0;
+      for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
+        /*Do not select 4MV mode when not all the luma blocks are coded when
+           we're in VP3 compatibility mode.*/
+        if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
+          continue;
+        }
+        mb->mode=modei;
+      }
+#if defined(OC_BITRATE_STATS)
+      /*Remember the error for the mode we selected in each fragment.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        fragi=mb->map[mapi>>2][mapi&3];
+        efrag=_enc->frinfo+fragi;
+        efrag->eerror=err[mb->mode][mapi];
+      }
+#endif
+      /*Go back and store the selected qi index corresponding to the selected
+         mode in each fragment.*/
+      for(codedi=0;codedi<ncoded;codedi++){
+        mapi=coded[codedi];
+        fragi=mb->map[mapi>>2][mapi&3];
+        frag=_enc->state.frags+fragi;
+        efrag=_enc->frinfo+fragi;
+        efrag->qii=(unsigned char)
+         frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
+        frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
+      }
+      inter_bits+=bits[mb->mode];
+      intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+      oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
+      switch(mb->mode){
+        case OC_MODE_INTER_MV:{
+          mvbitsa+=mbpmvbitsa;
+          mvbitsb+=12;
+          last_mv[1][0]=last_mv[0][0];
+          last_mv[1][1]=last_mv[0][1];
+          mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
+          mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
+        }break;
+        case OC_MODE_INTER_MV_LAST:{
+          mbmv[0]=last_mv[0][0];
+          mbmv[1]=last_mv[0][1];
+        }break;
+        case OC_MODE_INTER_MV_LAST2:{
+          mbmv[0]=last_mv[1][0];
+          mbmv[1]=last_mv[1][1];
+          last_mv[1][0]=last_mv[0][0];
+          last_mv[1][1]=last_mv[0][1];
+          last_mv[0][0]=mbmv[0];
+          last_mv[0][1]=mbmv[1];
+        }break;
+        case OC_MODE_INTER_MV_FOUR:{
+          mvbitsa+=mb4mvbitsa;
+          mvbitsb+=mb4mvbitsb;
+          if(ncoded_luma>0){
+            /*After 4MV mode, the last MV is the one from the last coded luma
+               block.*/
+            last_mv[1][0]=last_mv[0][0];
+            last_mv[1][1]=last_mv[0][1];
+            last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
+            last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
+          }
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mvbitsa+=mbgmvbitsa;
+          mvbitsb+=12;
+          mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
+          mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
+        }break;
+      }
+      if(OC_MODE_HAS_MV[mb->mode]){
+        /*Special case 4MV mode.
+          MVs are stored in bmvs.*/
+        if(mb->mode==OC_MODE_INTER_MV_FOUR){
+          for(codedi=0;codedi<ncoded;codedi++){
+            mapi=coded[codedi];
+            pli=mapi>>2;
+            bi=mapi&3;
+            fragi=mb->map[pli][bi];
+            frag=_enc->state.frags+fragi;
+            frag->mbmode=mb->mode;
+            frag->mv[0]=bmvs[!!pli][bi][0];
+            frag->mv[1]=bmvs[!!pli][bi][1];
+          }
+        }
+        /*For every other mode with a MV, it is stored in mbmv.*/
+        else{
+          for(codedi=0;codedi<ncoded;codedi++){
+            mapi=coded[codedi];
+            fragi=mb->map[mapi>>2][mapi&3];
+            frag=_enc->state.frags+fragi;
+            frag->mbmode=mb->mode;
+            frag->mv[0]=mbmv[0];
+            frag->mv[1]=mbmv[1];
+          }
+        }
+      }
+      /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
+      else{
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb->map[mapi>>2][mapi&3];
+          frag=_enc->state.frags+fragi;
+          frag->mbmode=mb->mode;
+          frag->mv[0]=frag->mv[1]=0;
+        }
+      }
+    }
+  }
+  /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
+  if(mvbitsb<mvbitsa){
+    _enc->mv_scheme=1;
+    inter_bits+=mvbitsb;
+  }
+  else{
+    _enc->mv_scheme=0;
+    inter_bits+=mvbitsa;
+  }
+  inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
+   _enc->mode_scheme_chooser.scheme_list[0]];
+  /*The easiest way to count the bits needed for coded/not coded fragments is
+     to code them.
+    We need to do this anyway, might as well do it now.*/
+  oggpackB_reset(&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_partial_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_coded_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+  inter_bits+=oc_enc_coded_block_flags_pack(_enc,&_enc->opb_coded_flags);
+  /*Select the quantizer list for INTER frames.*/
+  _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
+  }
+  if(intra_bits>inter_bits){
+    _enc->vbr->est_bits=inter_bits;
+    return OC_INTER_FRAME;
+  }
+  /*All INTRA mode is smaller, but we haven't counted up the cost of all the
+     not coded fragments we will now have to code.*/
+  uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
+  for(pli=0;pli<3;pli++){
+    uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
+    while(uncoded_fragi-->uncoded_fragi_end){
+      fragi=*uncoded_fragi;
+      frag=_enc->state.frags+fragi;
+      /*Assume a very small bit cost for invalid fragments.*/
+      if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
+      else{
+        int eerror;
+        eerror=0;
+        efrag=_enc->frinfo+fragi;
+        for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+#if defined(OC_BITRATE_STATS)
+        efrag->eerror=eerror;
+#endif
+        qi=_enc->qis[OC_INTRA_FRAME][0];
+        for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
+          if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
+           efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
+            qi=_enc->qis[OC_INTRA_FRAME][qii];
+          }
+        }
+        intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+         OC_MINI(eerror>>8,15)];
+        /*If it turns out INTRA mode was more expensive, we're done.*/
+        if(intra_bits>inter_bits){
+          _enc->vbr->est_bits=inter_bits;
+          return OC_INTER_FRAME;
+        }
+      }
+    }
+  }
+  /*So, we've compared the full cost estimates, and INTRA is still better.
+    Code an INTRA frame instead.*/
+  oc_enc_vbr_mark_all_intra(_enc);
+  _enc->vbr->est_bits=intra_bits;
+  return OC_INTRA_FRAME;
+}
+
+/*A pipeline stage for transforming, quantizing, and tokenizing the frame.*/
+
+static int oc_vbr_pipe_start(oc_enc_pipe_stage *_stage){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+  return 0;
+}
+
+static int oc_vbr_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+  int pli;
+  for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+  return 0;
+}
+
+static int oc_vbr_pipe_end(oc_enc_pipe_stage *_stage){
+  oc_enc_ctx *enc;
+  int         ret;
+  enc=_stage->enc;
+  if(enc->state.curframe_num==0||
+   enc->state.curframe_num-enc->state.keyframe_num>=
+   enc->keyframe_frequency_force){
+    enc->state.frame_type=OC_INTRA_FRAME;
+    oc_enc_vbr_quant_sel_quality(enc,1);
+    oc_enc_vbr_mark_all_intra(enc);
+  }
+  else{
+    oc_enc_vbr_mark_coded(enc);
+    /*Only proceed if we have some coded blocks.
+      No coded blocks -> dropped frame -> 0 byte packet.*/
+    if(enc->ncoded_frags>0){
+      oc_enc_vbr_quant_sel_quality(enc,0);
+      enc->state.frame_type=oc_enc_choose_mbmodes(enc);
+      if(enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(enc);
+    }
+  }
+  /*Only initialize subsequent stages after we know how many fragments will be
+     encoded, and at what quality (so the loop filter can be set up
+     properly).*/
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_start)(_stage->next);
+    if(ret<0)return ret;
+  }
+  if(enc->ncoded_frags>0){
+    /*TODO: These stages could be pipelined with reconstruction.*/
+    oc_enc_vbr_quant_dc(enc);
+    oc_enc_vbr_residual_tokenize(enc);
+  }
+  if(_stage->next!=NULL){
+    ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+    if(ret<0)return ret;
+    return (*_stage->next->pipe_end)(_stage->next);
+  }
+  return 0;
+}
+
+/*Initialize the transform, quantization, and tokenization stage of the
+   pipeline.
+  _enc: The encoding context.*/
+static void oc_vbr_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+  _stage->enc=_enc;
+  _stage->next=NULL;
+  _stage->pipe_start=oc_vbr_pipe_start;
+  _stage->pipe_proc=oc_vbr_pipe_process;
+  _stage->pipe_end=oc_vbr_pipe_end;
+}
+
+
+static int oc_enc_vbr_init(oc_enc_vbr_ctx *_vbr,oc_enc_ctx *_enc){
+  _vbr->cfg.qi=_enc->state.info.quality;
+  _vbr->cfg.kf_qi_min=_vbr->cfg.df_qi_min=0;
+  _vbr->cfg.kf_qi_max=_vbr->cfg.df_qi_max=63;
+  _vbr->enc=_enc;
+  _vbr->impmap=oc_impmap_alloc(_enc);
+  _vbr->psych=oc_psych_alloc(_enc);
+  oc_vbr_pipe_init(&_vbr->pipe,_enc);
+  return 0;
+}
+
+static void oc_enc_vbr_clear(oc_enc_vbr_ctx *_vbr){
+  oc_psych_free(_vbr->psych);
+  oc_impmap_free(_vbr->impmap);
+}
+
+static int oc_enc_vbr_cfg(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+  if(_cfg->qi<0||_cfg->qi>63||_cfg->kf_qi_min<0||_cfg->kf_qi_min>63||
+   _cfg->kf_qi_max<_cfg->kf_qi_min||_cfg->kf_qi_max>63||
+   _cfg->df_qi_min<0||_cfg->df_qi_min>63||
+   _cfg->df_qi_max<_cfg->df_qi_min||_cfg->df_qi_max>63){
+    return OC_EINVAL;
+  }
+  memcpy(&_vbr->cfg,_cfg,sizeof(_vbr->cfg));
+  return 0;
+}
+
+static oc_enc_pipe_stage *oc_enc_vbr_create_pipe(oc_enc_vbr_ctx *_vbr){
+  oc_enc_pipe_stage *pipe;
+  _vbr->enc->fill_pipe.next=&_vbr->enc->pack_pipe;
+  _vbr->pipe.next=&_vbr->enc->copy_pipe;
+  /*TODO: Disable spatial masking and CSF filtering based on
+     application-specified speed level.*/
+  pipe=oc_psych_prepend_to_pipe(_vbr->psych,&_vbr->pipe);
+  _vbr->enc->fdct_pipe.next=pipe;
+  /*TODO: Disable impmap based on application-specified speed level.*/
+  pipe=oc_impmap_prepend_to_pipe(_vbr->impmap,&_vbr->enc->fdct_pipe);
+  pipe=oc_mcenc_prepend_to_pipe(_vbr->enc->mcenc,pipe);
+  return pipe;
+}
+
+
+oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc){
+  oc_enc_vbr_ctx *vbr;
+  vbr=(oc_enc_vbr_ctx *)_ogg_malloc(sizeof(*vbr));
+  oc_enc_vbr_init(vbr,_enc);
+  return vbr;
+}
+
+void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr){
+  if(_vbr!=NULL){
+    oc_enc_vbr_clear(_vbr);
+    _ogg_free(_vbr);
+  }
+}
+
+int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+  if(_cfg!=NULL){
+    int ret;
+    ret=oc_enc_vbr_cfg(_vbr,_cfg);
+    if(ret<0)return ret;
+  }
+  /*Map the qi to a multiple of JND values.*/
+  _vbr->qscale=_vbr->cfg.qi>=63?0.5F:1.5F*OC_POWF(2,0.0625F*(64-_vbr->cfg.qi));
+  _vbr->enc->pipe=oc_enc_vbr_create_pipe(_vbr);
+  /*TODO: Implement a real speed level.*/
+  _vbr->enc->speed_max=0;
+  _vbr->enc->set_speed=oc_enc_set_speed_null;
+  return 0;
+}


Property changes on: experimental/derf/theora-exp/lib/encvbr.c
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: experimental/derf/theora-exp/lib/encvbr.h
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.h	2005-09-22 20:21:40 UTC (rev 10043)
+++ experimental/derf/theora-exp/lib/encvbr.h	2005-09-22 20:24:19 UTC (rev 10044)
@@ -1,43 +1,43 @@
-#if !defined(_encvbr_H)
-# define _encvbr_H (1)
-# include "encint.h"
-
-
-
-typedef struct oc_impmap_ctx oc_impmap_ctx;
-typedef struct oc_psych_ctx  oc_psych_ctx;
-
-
-
-/*Context information for the VBR encoder.*/
-struct oc_enc_vbr_ctx{
-  /*Configuration information.*/
-  theora_vbr_cfg     cfg;
-  /*The main VBR encoder's pipe stage.*/
-  oc_enc_pipe_stage  pipe;
-  /*The scale factor for the current quality setting.*/
-  float              qscale;
-  /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
-  unsigned           dc_tol_mins[3];
-  /*The estimated bit cost of the current frame.*/
-  int                est_bits;
-  /*The encode context.*/
-  oc_enc_ctx        *enc;
-  /*Context information used to generate the importance map.*/
-  oc_impmap_ctx     *impmap;
-  /*Context information used to generate low-level perceptual weightings.*/
-  oc_psych_ctx      *psych;
-};
-
-
-oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
-void oc_impmap_free(oc_impmap_ctx *_impmap);
-oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
- oc_enc_pipe_stage *_next);
-
-oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
-void oc_psych_free(oc_psych_ctx *_psych);
-oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
- oc_enc_pipe_stage *_next);
-
-#endif
+#if !defined(_encvbr_H)
+# define _encvbr_H (1)
+# include "encint.h"
+
+
+
+typedef struct oc_impmap_ctx oc_impmap_ctx;
+typedef struct oc_psych_ctx  oc_psych_ctx;
+
+
+
+/*Context information for the VBR encoder.*/
+struct oc_enc_vbr_ctx{
+  /*Configuration information.*/
+  theora_vbr_cfg     cfg;
+  /*The main VBR encoder's pipe stage.*/
+  oc_enc_pipe_stage  pipe;
+  /*The scale factor for the current quality setting.*/
+  float              qscale;
+  /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
+  unsigned           dc_tol_mins[3];
+  /*The estimated bit cost of the current frame.*/
+  int                est_bits;
+  /*The encode context.*/
+  oc_enc_ctx        *enc;
+  /*Context information used to generate the importance map.*/
+  oc_impmap_ctx     *impmap;
+  /*Context information used to generate low-level perceptual weightings.*/
+  oc_psych_ctx      *psych;
+};
+
+
+oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
+void oc_impmap_free(oc_impmap_ctx *_impmap);
+oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
+ oc_enc_pipe_stage *_next);
+
+oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
+void oc_psych_free(oc_psych_ctx *_psych);
+oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
+ oc_enc_pipe_stage *_next);
+
+#endif


Property changes on: experimental/derf/theora-exp/lib/encvbr.h
___________________________________________________________________
Name: svn:eol-style
   + native