[xiph-commits] r16443 - in trunk/theora: . lib lib/x86 lib/x86_vc win32/VS2005/libtheora win32/VS2008/libtheora win32/xmingw32

giles at svn.xiph.org giles at svn.xiph.org
Wed Aug 5 18:43:17 PDT 2009


Author: giles
Date: 2009-08-05 18:43:12 -0700 (Wed, 05 Aug 2009)
New Revision: 16443

Added:
   trunk/theora/lib/analyze.c
   trunk/theora/lib/apiwrapper.c
   trunk/theora/lib/apiwrapper.h
   trunk/theora/lib/bitpack.c
   trunk/theora/lib/bitpack.h
   trunk/theora/lib/dct.h
   trunk/theora/lib/decapiwrapper.c
   trunk/theora/lib/decinfo.c
   trunk/theora/lib/decint.h
   trunk/theora/lib/decode.c
   trunk/theora/lib/dequant.c
   trunk/theora/lib/dequant.h
   trunk/theora/lib/encapiwrapper.c
   trunk/theora/lib/encfrag.c
   trunk/theora/lib/encinfo.c
   trunk/theora/lib/encint.h
   trunk/theora/lib/encode.c
   trunk/theora/lib/encoder_disabled.c
   trunk/theora/lib/enquant.c
   trunk/theora/lib/enquant.h
   trunk/theora/lib/fdct.c
   trunk/theora/lib/fragment.c
   trunk/theora/lib/huffdec.c
   trunk/theora/lib/huffdec.h
   trunk/theora/lib/huffenc.c
   trunk/theora/lib/huffenc.h
   trunk/theora/lib/huffman.h
   trunk/theora/lib/idct.c
   trunk/theora/lib/info.c
   trunk/theora/lib/internal.c
   trunk/theora/lib/mathops.c
   trunk/theora/lib/mathops.h
   trunk/theora/lib/mcenc.c
   trunk/theora/lib/modedec.h
   trunk/theora/lib/ocintrin.h
   trunk/theora/lib/quant.c
   trunk/theora/lib/quant.h
   trunk/theora/lib/rate.c
   trunk/theora/lib/state.c
   trunk/theora/lib/tokenize.c
   trunk/theora/lib/x86/
   trunk/theora/lib/x86/mmxencfrag.c
   trunk/theora/lib/x86/mmxfdct.c
   trunk/theora/lib/x86/sse2fdct.c
   trunk/theora/lib/x86/x86enc.c
   trunk/theora/lib/x86/x86enc.h
   trunk/theora/lib/x86_vc/
   trunk/theora/lib/x86_vc/mmxencfrag.c
   trunk/theora/lib/x86_vc/mmxfdct.c
   trunk/theora/lib/x86_vc/x86enc.c
   trunk/theora/lib/x86_vc/x86enc.h
Removed:
   trunk/theora/lib/dec/
   trunk/theora/lib/enc/
Modified:
   trunk/theora/SConstruct
   trunk/theora/configure.ac
   trunk/theora/lib/Makefile.am
   trunk/theora/lib/internal.h
   trunk/theora/lib/x86/x86int.h
   trunk/theora/lib/x86/x86state.c
   trunk/theora/lib/x86_vc/x86int.h
   trunk/theora/lib/x86_vc/x86state.c
   trunk/theora/win32/VS2005/libtheora/libtheora_dynamic.vcproj
   trunk/theora/win32/VS2005/libtheora/libtheora_static.vcproj
   trunk/theora/win32/VS2008/libtheora/libtheora_dynamic.vcproj
   trunk/theora/win32/VS2008/libtheora/libtheora_static.vcproj
   trunk/theora/win32/xmingw32/Makefile
Log:
Move the encoder and decoder source back into a single directory.

These were split when they were being worked on separately, and to 
simply building when they shared little code. However, for performance 
and reuse reasons, the encoder now shares significant code with the 
decoder and it doesn't make since to keep the source separate any more.

I've attempted to update all the build files, but the msvc build is 
untested and the xmingw32 build is only partially tested.

Also, add the prefix fedora 11 uses for the xmingw32 compiler.


Modified: trunk/theora/SConstruct
===================================================================
--- trunk/theora/SConstruct	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/SConstruct	2009-08-06 01:43:12 UTC (rev 16443)
@@ -6,40 +6,40 @@
 def path(prefix, list): return [join(prefix, x) for x in list]
 
 encoder_sources = """
-	dec/apiwrapper.c
-	dec/fragment.c
-	dec/idct.c
-	dec/internal.c
-	dec/state.c
-	dec/quant.c
-	enc/analyze.c
-	enc/encfrag.c
-	enc/encapiwrapper.c
-	enc/encinfo.c
-	enc/encode.c
-	enc/enquant.c
-	enc/fdct.c
-	enc/huffenc.c
-	enc/mathops.c
-	enc/mcenc.c
-	enc/rate.c
-	enc/tokenize.c
+	apiwrapper.c
+	fragment.c
+	idct.c
+	internal.c
+	state.c
+	quant.c
+	analyze.c
+	encfrag.c
+	encapiwrapper.c
+	encinfo.c
+	encode.c
+	enquant.c
+	fdct.c
+	huffenc.c
+	mathops.c
+	mcenc.c
+	rate.c
+	tokenize.c
 """
 
 decoder_sources = """
-        dec/apiwrapper.c
-	dec/bitpack.c
-        dec/decapiwrapper.c
-        dec/decinfo.c
-        dec/decode.c
-        dec/dequant.c
-        dec/fragment.c
-        dec/huffdec.c
-        dec/idct.c
-        dec/info.c
-        dec/internal.c
-        dec/quant.c
-        dec/state.c
+        apiwrapper.c
+	bitpack.c
+        decapiwrapper.c
+        decinfo.c
+        decode.c
+        dequant.c
+        fragment.c
+        huffdec.c
+        idct.c
+        info.c
+        internal.c
+        quant.c
+        state.c
 """
 
 env = Environment()
@@ -129,37 +129,37 @@
 if conf.CheckHost_x86_32():
   env.Append(CPPDEFINES='OC_X86_ASM')
   decoder_sources += """
-        dec/x86/mmxidct.c
-        dec/x86/mmxfrag.c
-        dec/x86/mmxstate.c
-        dec/x86/x86state.c
+        x86/mmxidct.c
+        x86/mmxfrag.c
+        x86/mmxstate.c
+        x86/x86state.c
   """
   encoder_sources += """
-	enc/x86/mmxencfrag.c
-	enc/x86/mmxfdct.c
-	enc/x86/x86enc.c
-	dec/x86/mmxfrag.c
-	dec/x86/mmxidct.c
-	dec/x86/mmxstate.c
-	dec/x86/x86state.c
+	x86/mmxencfrag.c
+	x86/mmxfdct.c
+	x86/x86enc.c
+	x86/mmxfrag.c
+	x86/mmxidct.c
+	x86/mmxstate.c
+	x86/x86state.c
   """
 elif conf.CheckHost_x86_64():
   env.Append(CPPDEFINES=['OC_X86_ASM', 'OC_X86_64_ASM'])
   decoder_sources += """
-        dec/x86/mmxidct.c
-        dec/x86/mmxfrag.c
-        dec/x86/mmxstate.c
-        dec/x86/x86state.c
+        x86/mmxidct.c
+        x86/mmxfrag.c
+        x86/mmxstate.c
+        x86/x86state.c
   """
   encoder_sources += """
-	enc/x86/mmxencfrag.c
-	enc/x86/mmxfdct.c
-	enc/x86/x86enc.c
-	enc/x86/sse2fdct.c
-	dec/x86/mmxfrag.c
-	dec/x86/mmxidct.c
-	dec/x86/mmxstate.c
-	dec/x86/x86state.c
+	x86/mmxencfrag.c
+	x86/mmxfdct.c
+	x86/x86enc.c
+	x86/sse2fdct.c
+	x86/mmxfrag.c
+	x86/mmxidct.c
+	x86/mmxstate.c
+	x86/x86state.c
   """
 
 env = conf.Finish()

Modified: trunk/theora/configure.ac
===================================================================
--- trunk/theora/configure.ac	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/configure.ac	2009-08-06 01:43:12 UTC (rev 16443)
@@ -10,7 +10,7 @@
 AC_CANONICAL_TARGET
 
 AM_CONFIG_HEADER([config.h])
-AC_CONFIG_SRCDIR([lib/enc/fdct.c])
+AC_CONFIG_SRCDIR([lib/fdct.c])
 AM_INIT_AUTOMAKE
 AM_MAINTAINER_MODE
 

Modified: trunk/theora/lib/Makefile.am
===================================================================
--- trunk/theora/lib/Makefile.am	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/Makefile.am	2009-08-06 01:43:12 UTC (rev 16443)
@@ -3,42 +3,42 @@
 
 EXTRA_DIST = \
 	cpu.c \
-	enc/x86/mmxencfrag.c \
-	enc/x86/mmxfdct.c \
-	enc/x86/sse2fdct.c \
-	enc/x86/x86enc.c \
-	enc/x86/x86enc.h \
-	dec/x86/mmxfrag.c \
-	dec/x86/mmxfrag.h \
-	dec/x86/mmxidct.c \
-	dec/x86/mmxloop.h \
-	dec/x86/mmxstate.c \
-	dec/x86/x86int.h \
-	dec/x86/x86state.c \
-	dec/x86_vc
+	x86/mmxencfrag.c \
+	x86/mmxfdct.c \
+	x86/sse2fdct.c \
+	x86/x86enc.c \
+	x86/x86enc.h \
+	x86/mmxfrag.c \
+	x86/mmxfrag.h \
+	x86/mmxidct.c \
+	x86/mmxloop.h \
+	x86/mmxstate.c \
+	x86/x86int.h \
+	x86/x86state.c \
+	x86_vc
 
 lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la
 
 if THEORA_DISABLE_ENCODE
 encoder_uniq_sources = \
-	enc/encoder_disabled.c
+	encoder_disabled.c
 
 encoder_sources = \
 	$(encoder_uniq_sources)
 else
 encoder_uniq_x86_sources = \
-	enc/x86/mmxencfrag.c \
-	enc/x86/mmxfdct.c \
-	enc/x86/x86enc.c
+	x86/mmxencfrag.c \
+	x86/mmxfdct.c \
+	x86/x86enc.c
 
 encoder_uniq_x86_64_sources = \
-	enc/x86/sse2fdct.c
+	x86/sse2fdct.c
 
 encoder_shared_x86_sources = \
-	dec/x86/mmxfrag.c \
-	dec/x86/mmxidct.c \
-	dec/x86/mmxstate.c \
-	dec/x86/x86state.c
+	x86/mmxfrag.c \
+	x86/mmxidct.c \
+	x86/mmxstate.c \
+	x86/x86state.c
 
 encoder_shared_x86_64_sources =
 
@@ -60,37 +60,37 @@
 endif
 
 encoder_uniq_sources = \
-	enc/analyze.c \
-	enc/fdct.c \
-	enc/encfrag.c \
-	enc/encapiwrapper.c \
-	enc/encinfo.c \
-	enc/encode.c \
-	enc/enquant.c \
-	enc/huffenc.c \
-	enc/mathops.c \
-	enc/mcenc.c \
-	enc/rate.c \
-	enc/tokenize.c \
+	analyze.c \
+	fdct.c \
+	encfrag.c \
+	encapiwrapper.c \
+	encinfo.c \
+	encode.c \
+	enquant.c \
+	huffenc.c \
+	mathops.c \
+	mcenc.c \
+	rate.c \
+	tokenize.c \
 	$(encoder_uniq_arch_sources)
 
 encoder_sources = \
-	dec/apiwrapper.c \
-	dec/fragment.c \
-	dec/idct.c \
-	dec/internal.c \
-	dec/state.c \
-	dec/quant.c \
+	apiwrapper.c \
+	fragment.c \
+	idct.c \
+	internal.c \
+	state.c \
+	quant.c \
 	$(encoder_shared_arch_sources) \
 	$(encoder_uniq_sources)
 
 endif
 
 decoder_x86_sources = \
-	dec/x86/mmxidct.c \
-	dec/x86/mmxfrag.c \
-	dec/x86/mmxstate.c \
-	dec/x86/x86state.c
+	x86/mmxidct.c \
+	x86/mmxfrag.c \
+	x86/mmxstate.c \
+	x86/x86state.c
 if CPU_x86_64
 decoder_arch_sources = $(decoder_x86_sources)
 else
@@ -102,42 +102,42 @@
 endif
 
 decoder_sources = \
-	dec/apiwrapper.c \
-	dec/bitpack.c \
-	dec/decapiwrapper.c \
-	dec/decinfo.c \
-	dec/decode.c \
-	dec/dequant.c \
-	dec/fragment.c \
-	dec/huffdec.c \
-	dec/idct.c \
-	dec/info.c \
-	dec/internal.c \
-	dec/quant.c \
-	dec/state.c \
+	apiwrapper.c \
+	bitpack.c \
+	decapiwrapper.c \
+	decinfo.c \
+	decode.c \
+	dequant.c \
+	fragment.c \
+	huffdec.c \
+	idct.c \
+	info.c \
+	internal.c \
+	quant.c \
+	state.c \
 	$(decoder_arch_sources)
 
 noinst_HEADERS = \
 	cpu.h \
 	internal.h \
-	enc/encint.h \
-	enc/enquant.h \
-	enc/huffenc.h \
-	enc/mathops.h \
-	enc/modedec.h \
-	enc/x86/x86enc.h \
-	dec/apiwrapper.h \
-	dec/bitpack.h \
-	dec/dct.h \
-	dec/decint.h \
-	dec/dequant.h \
-	dec/huffdec.h \
-	dec/huffman.h \
-	dec/ocintrin.h \
-	dec/quant.h \
-	dec/x86/mmxfrag.h \
-	dec/x86/mmxloop.h \
-	dec/x86/x86int.h
+	encint.h \
+	enquant.h \
+	huffenc.h \
+	mathops.h \
+	modedec.h \
+	x86/x86enc.h \
+	apiwrapper.h \
+	bitpack.h \
+	dct.h \
+	decint.h \
+	dequant.h \
+	huffdec.h \
+	huffman.h \
+	ocintrin.h \
+	quant.h \
+	x86/mmxfrag.h \
+	x86/mmxloop.h \
+	x86/x86int.h
 
 libtheoradec_la_SOURCES = \
 	$(decoder_sources) \

Copied: trunk/theora/lib/analyze.c (from rev 16442, trunk/theora/lib/enc/analyze.c)
===================================================================
--- trunk/theora/lib/analyze.c	                        (rev 0)
+++ trunk/theora/lib/analyze.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,2680 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: mode selection code
+  last mod: $Id$
+
+ ********************************************************************/
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+#include "modedec.h"
+
+
+
+typedef struct oc_fr_state           oc_fr_state;
+typedef struct oc_qii_state          oc_qii_state;
+typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
+typedef struct oc_rd_metric          oc_rd_metric;
+typedef struct oc_mode_choice        oc_mode_choice;
+
+
+
+/*There are 8 possible schemes used to encode macro block modes.
+  Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes.
+  The same set of Huffman codes is used for each of these 7 schemes, but the
+   mode assigned to each codeword varies.
+  Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream,
+   while schemes 1-6 have a fixed mapping.
+  Scheme 7 just encodes each mode directly in 3 bits.*/
+
+/*The mode orderings for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.
+  This is the inverse of the equivalent table OC_MODE_ALPHABETS in the
+   decoder.*/
+static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={
+  /*Last MV dominates.*/ 
+  /*L P M N I G GM 4*/
+  {3,4,2,0,1,5,6,7},
+  /*L P N M I G GM 4*/
+  {2,4,3,0,1,5,6,7},
+  /*L M P N I G GM 4*/
+  {3,4,1,0,2,5,6,7},
+  /*L M N P I G GM 4*/
+  {2,4,1,0,3,5,6,7},
+  /*No MV dominates.*/
+  /*N L P M I G GM 4*/
+  {0,4,3,1,2,5,6,7},
+  /*N G L P M I GM 4*/
+  {0,5,4,2,3,1,6,7},
+  /*Default ordering.*/
+  /*N I M L P G GM 4*/
+  {0,1,2,3,4,5,6,7}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+  This need only be called once per encoder.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+  int si;
+  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+  for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1];
+}
+
+/*Reset the mode scheme chooser.
+  This needs to be called once for each frame, including the first.*/
+static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+  int si;
+  memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts));
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]=24;
+  memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits));
+  for(si=0;si<8;si++){
+    /*Scheme 7 should always start first, and scheme 0 should always start
+       last.*/
+    _chooser->scheme_list[si]=7-si;
+    _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si;
+  }
+}
+
+
+/*This is the real purpose of this data structure: not actually selecting a
+   mode scheme, but estimating the cost of coding a given mode given all the
+   modes selected so far.
+  This is done via opportunity cost: the cost is defined as the number of bits
+   required to encode all the modes selected so far including the current one
+   using the best possible scheme, minus the number of bits required to encode
+   all the modes selected so far not including the current one using the best
+   possible scheme.
+  The computational expense of doing this probably makes it overkill.
+  Just be happy we take a greedy approach instead of trying to solve the
+   global mode-selection problem (which is NP-hard).
+  _mb_mode: The mode to determine the cost of.
+  Return: The number of bits required to code this mode.*/
+static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int scheme0;
+  int scheme1;
+  int best_bits;
+  int mode_bits;
+  int si;
+  int scheme_bits;
+  scheme0=_chooser->scheme_list[0];
+  scheme1=_chooser->scheme_list[1];
+  best_bits=_chooser->scheme_bits[scheme0];
+  mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]];
+  /*Typical case: If the difference between the best scheme and the next best
+     is greater than 6 bits, then adding just one mode cannot change which
+     scheme we use.*/
+  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+  /*Otherwise, check to see if adding this mode selects a different scheme as
+     the best.*/
+  si=1;
+  best_bits+=mode_bits;
+  do{
+    /*For any scheme except 0, we can just use the bit cost of the mode's rank
+       in that scheme.*/
+    if(scheme1!=0){
+      scheme_bits=_chooser->scheme_bits[scheme1]+
+       OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]];
+    }
+    else{
+      int ri;
+      /*For scheme 0, incrementing the mode count could potentially change the
+         mode's rank.
+        Find the index where the mode would be moved to in the optimal list,
+         and use its bit cost instead of the one for the mode's current
+         position in the list.*/
+      /*We don't recompute scheme bits; this is computing opportunity cost, not
+         an update.*/
+      for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&&
+       _chooser->mode_counts[_mb_mode]>=
+       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri];
+    }
+    if(scheme_bits<best_bits)best_bits=scheme_bits;
+    if(++si>=8)break;
+    scheme1=_chooser->scheme_list[si];
+  }
+  while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6);
+  return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Incrementally update the mode counts and per-scheme bit counts and re-order
+   the scheme lists once a mode has been selected.
+  _mb_mode: The mode that was chosen.*/
+static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int ri;
+  int si;
+  _chooser->mode_counts[_mb_mode]++;
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){
+    int pmode;
+    pmode=_chooser->scheme0_list[ri-1];
+    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break;
+    /*Reorder the mode ranking.*/
+    _chooser->scheme0_ranks[pmode]++;
+    _chooser->scheme0_list[ri]=pmode;
+  }
+  _chooser->scheme0_ranks[_mb_mode]=ri;
+  _chooser->scheme0_list[ri]=_mb_mode;
+  /*Now add the bit cost for the mode to each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]+=
+     OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]];
+  }
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    sj=si;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}
+
+
+
+/*The number of bits required to encode a super block run.
+  _run_count: The desired run count; must be positive and less than 4130.*/
+static int oc_sb_run_bits(int _run_count){
+  int i;
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  return OC_SB_RUN_CODE_NBITS[i];
+}
+
+/*The number of bits required to encode a block run.
+  _run_count: The desired run count; must be positive and less than 30.*/
+static int oc_block_run_bits(int _run_count){
+  return OC_BLOCK_RUN_CODE_NBITS[_run_count-1];
+}
+
+
+
+/*State to track coded block flags and their bit cost.*/
+struct oc_fr_state{
+  ptrdiff_t  bits;
+  unsigned   sb_partial_count:16;
+  unsigned   sb_full_count:16;
+  unsigned   b_coded_count_prev:8;
+  unsigned   b_coded_count:8;
+  unsigned   b_count:8;
+  signed int sb_partial:2;
+  signed int sb_full:2;
+  signed int b_coded_prev:2;
+  signed int b_coded:2;
+};
+
+
+
+static void oc_fr_state_init(oc_fr_state *_fr){
+  _fr->bits=0;
+  _fr->sb_partial_count=0;
+  _fr->sb_full_count=0;
+  _fr->b_coded_count_prev=0;
+  _fr->b_coded_count=0;
+  _fr->b_count=0;
+  _fr->sb_partial=-1;
+  _fr->sb_full=-1;
+  _fr->b_coded_prev=-1;
+  _fr->b_coded=-1;
+}
+
+
+static void oc_fr_state_advance_sb(oc_fr_state *_fr,
+ int _sb_partial,int _sb_full){
+  ptrdiff_t bits;
+  int       sb_partial_count;
+  int       sb_full_count;
+  bits=_fr->bits;
+  /*Extend the sb_partial run, or start a new one.*/
+  sb_partial_count=_fr->sb_partial;
+  if(_fr->sb_partial==_sb_partial){
+    if(sb_partial_count>=4129){
+      bits++;
+      sb_partial_count=0;
+    }
+    else bits-=oc_sb_run_bits(sb_partial_count);
+  }
+  else sb_partial_count=0;
+  sb_partial_count++;
+  bits+=oc_sb_run_bits(sb_partial_count);
+  if(!_sb_partial){
+    /*Extend the sb_full run, or start a new one.*/
+    sb_full_count=_fr->sb_full_count;
+    if(_fr->sb_full==_sb_full){
+      if(sb_full_count>=4129){
+        bits++;
+        sb_full_count=0;
+      }
+      else bits-=oc_sb_run_bits(sb_full_count);
+    }
+    else sb_full_count=0;
+    sb_full_count++;
+    bits+=oc_sb_run_bits(sb_full_count);
+    _fr->sb_full=_sb_full;
+    _fr->sb_full_count=sb_full_count;
+  }
+  _fr->bits=bits;
+  _fr->sb_partial=_sb_partial;
+  _fr->sb_partial_count=sb_partial_count;
+}
+
+/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16
+   blocks).*/
+static void oc_fr_state_flush_sb(oc_fr_state *_fr){
+  ptrdiff_t bits;
+  int       sb_partial;
+  int       sb_full=sb_full;
+  int       b_coded_count;
+  int       b_coded;
+  int       b_count;
+  b_count=_fr->b_count;
+  if(b_count>0){
+    bits=_fr->bits;
+    b_coded=_fr->b_coded;
+    b_coded_count=_fr->b_coded_count;
+    if(b_coded_count>=b_count){
+      /*This SB was fully coded/uncoded; roll back the partial block flags.*/
+      bits-=oc_block_run_bits(b_coded_count);
+      if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count);
+      sb_partial=0;
+      sb_full=b_coded;
+      b_coded=_fr->b_coded_prev;
+      b_coded_count=_fr->b_coded_count_prev;
+    }
+    else{
+      /*It was partially coded.*/
+      sb_partial=1;
+      /*sb_full is unused.*/
+    }
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_coded_count_prev=b_coded_count;
+    _fr->b_count=0;
+    _fr->b_coded=b_coded;
+    _fr->b_coded_prev=b_coded;
+    oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+  }
+}
+
+static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){
+  ptrdiff_t bits;
+  int       b_coded_count;
+  int       b_count;
+  int       sb_partial;
+  int       sb_full=sb_full;
+  bits=_fr->bits;
+  /*Extend the b_coded run, or start a new one.*/
+  b_coded_count=_fr->b_coded_count;
+  if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count);
+  else b_coded_count=0;
+  b_coded_count++;
+  b_count=_fr->b_count+1;
+  if(b_count>=16){
+    /*We finished a superblock.*/
+    if(b_coded_count>=16){
+      /*It was fully coded/uncoded; roll back the partial block flags.*/
+      if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16);
+      sb_partial=0;
+      sb_full=_b_coded;
+      _b_coded=_fr->b_coded_prev;
+      b_coded_count=_fr->b_coded_count_prev;
+    }
+    else{
+      bits+=oc_block_run_bits(b_coded_count);
+      /*It was partially coded.*/
+      sb_partial=1;
+      /*sb_full is unused.*/
+    }
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_coded_count_prev=b_coded_count;
+    _fr->b_count=0;
+    _fr->b_coded=_b_coded;
+    _fr->b_coded_prev=_b_coded;
+    oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+  }
+  else{
+    bits+=oc_block_run_bits(b_coded_count);
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_count=b_count;
+    _fr->b_coded=_b_coded;
+  }
+}
+
+static void oc_fr_skip_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,0);
+}
+
+static void oc_fr_code_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,1);
+}
+
+static int oc_fr_cost1(const oc_fr_state *_fr){
+  oc_fr_state tmp;
+  ptrdiff_t   bits;
+  *&tmp=*_fr;
+  oc_fr_skip_block(&tmp);
+  bits=tmp.bits;
+  *&tmp=*_fr;
+  oc_fr_code_block(&tmp);
+  return (int)(tmp.bits-bits);
+}
+
+static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){
+  oc_fr_state tmp;
+  *&tmp=*_pre;
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  return (int)(_post->bits-tmp.bits);
+}
+
+
+
+struct oc_qii_state{
+  ptrdiff_t  bits;
+  unsigned   qi01_count:14;
+  signed int qi01:2;
+  unsigned   qi12_count:14;
+  signed int qi12:2;
+};
+
+
+
+static void oc_qii_state_init(oc_qii_state *_qs){
+  _qs->bits=0;
+  _qs->qi01_count=0;
+  _qs->qi01=-1;
+  _qs->qi12_count=0;
+  _qs->qi12=-1;
+}
+
+
+static void oc_qii_state_advance(oc_qii_state *_qd,
+ const oc_qii_state *_qs,int _qii){
+  ptrdiff_t bits;
+  int       qi01;
+  int       qi01_count;
+  int       qi12;
+  int       qi12_count;
+  bits=_qs->bits;
+  qi01=_qii+1>>1;
+  qi01_count=_qs->qi01_count;
+  if(qi01==_qs->qi01){
+    if(qi01_count>=4129){
+      bits++;
+      qi01_count=0;
+    }
+    else bits-=oc_sb_run_bits(qi01_count);
+  }
+  else qi01_count=0;
+  qi01_count++;
+  bits+=oc_sb_run_bits(qi01_count);
+  qi12_count=_qs->qi12_count;
+  if(_qii){
+    qi12=_qii>>1;
+    if(qi12==_qs->qi12){
+      if(qi12_count>=4129){
+        bits++;
+        qi12_count=0;
+      }
+      else bits-=oc_sb_run_bits(qi12_count);
+    }
+    else qi12_count=0;
+    qi12_count++;
+    bits+=oc_sb_run_bits(qi12_count);
+  }
+  else qi12=_qs->qi12;
+  _qd->bits=bits;
+  _qd->qi01=qi01;
+  _qd->qi01_count=qi01_count;
+  _qd->qi12=qi12;
+  _qd->qi12_count=qi12_count;
+}
+
+
+
+/*Temporary encoder state for the analysis pipeline.*/
+struct oc_enc_pipeline_state{
+  int                 bounding_values[256];
+  oc_fr_state         fr[3];
+  oc_qii_state        qs[3];
+  /*Condensed dequantization tables.*/
+  const ogg_uint16_t *dequant[3][3][2];
+  /*Condensed quantization tables.*/
+  const oc_iquant    *enquant[3][3][2];
+  /*Skip SSD storage for the current MCU in each plane.*/
+  unsigned           *skip_ssd[3];
+  /*Coded/uncoded fragment lists for each plane for the current MCU.*/
+  ptrdiff_t          *coded_fragis[3];
+  ptrdiff_t          *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  /*The starting fragment for the current MCU in each plane.*/
+  ptrdiff_t           froffset[3];
+  /*The starting row for the current MCU in each plane.*/
+  int                 fragy0[3];
+  /*The ending row for the current MCU in each plane.*/
+  int                 fragy_end[3];
+  /*The starting superblock for the current MCU in each plane.*/
+  unsigned            sbi0[3];
+  /*The ending superblock for the current MCU in each plane.*/
+  unsigned            sbi_end[3];
+  /*The number of tokens for zzi=1 for each color plane.*/
+  int                 ndct_tokens1[3];
+  /*The outstanding eob_run count for zzi=1 for each color plane.*/
+  int                 eob_run1[3];
+  /*Whether or not the loop filter is enabled.*/
+  int                 loop_filter;
+};
+
+
+static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){
+  ptrdiff_t *coded_fragis;
+  unsigned   mcu_nvsbs;
+  ptrdiff_t  mcu_nfrags;
+  int        hdec;
+  int        vdec;
+  int        pli;
+  int        qii;
+  int        qti;
+  /*Initialize the per-plane coded block flag trackers.
+    These are used for bit-estimation purposes only; the real flag bits span
+     all three planes, so we can't compute them in parallel.*/
+  for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli);
+  for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli);
+  /*Set up the per-plane skip SSD storage pointers.*/
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16;
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  _pipe->skip_ssd[0]=_enc->mcu_skip_ssd;
+  _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags;
+  _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec);
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.
+    Unlike the decoder, each planes' coded and uncoded fragment list is kept
+     separate during the analysis stage; we only make the coded list for all
+     three planes contiguous right before the final packet is output
+     (destroying the uncoded lists, which are no longer needed).*/
+  coded_fragis=_enc->state.coded_fragis;
+  for(pli=0;pli<3;pli++){
+    _pipe->coded_fragis[pli]=coded_fragis;
+    coded_fragis+=_enc->state.fplanes[pli].nfrags;
+    _pipe->uncoded_fragis[pli]=coded_fragis;
+  }
+  memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis));
+  memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis));
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_enc->state.nqis;qii++){
+      int qi;
+      qi=_enc->state.qis[qii];
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti];
+        _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti];
+      }
+    }
+  }
+  /*Initialize the tokenization state.*/
+  for(pli=0;pli<3;pli++){
+    _pipe->ndct_tokens1[pli]=0;
+    _pipe->eob_run1[pli]=0;
+  }
+  /*Initialize the bounding value array for the loop filter.*/
+  _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state,
+   _pipe->bounding_values);
+}
+
+/*Sets the current MCU stripe to super block row _sby.
+  Return: A non-zero value if this was the last MCU.*/
+static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _sby){
+  const oc_fragment_plane *fplane;
+  unsigned                 mcu_nvsbs;
+  int                      sby_end;
+  int                      notdone;
+  int                      vdec;
+  int                      pli;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  sby_end=_enc->state.fplanes[0].nvsbs;
+  notdone=_sby+mcu_nvsbs<sby_end;
+  if(notdone)sby_end=_sby+mcu_nvsbs;
+  vdec=0;
+  for(pli=0;pli<3;pli++){
+    fplane=_enc->state.fplanes+pli;
+    _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs;
+    _pipe->fragy0[pli]=_sby<<2-vdec;
+    _pipe->froffset[pli]=fplane->froffset
+     +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags;
+    if(notdone){
+      _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs;
+      _pipe->fragy_end[pli]=sby_end<<2-vdec;
+    }
+    else{
+      _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs;
+      _pipe->fragy_end[pli]=fplane->nvfrags;
+    }
+    vdec=!(_enc->state.info.pixel_fmt&2);
+  }
+  return notdone;
+}
+
+static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
+  int refi;
+  /*Copy over all the uncoded fragments from this plane and advance the uncoded
+     fragment list.*/
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+  _pipe->nuncoded_fragis[_pli]=0;
+  /*Perform DC prediction.*/
+  oc_enc_pred_dc_frag_rows(_enc,_pli,
+   _pipe->fragy0[_pli],_pipe->fragy_end[_pli]);
+  /*Finish DC tokenization.*/
+  oc_enc_tokenize_dc_frag_list(_enc,_pli,
+   _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli],
+   _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]);
+  _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1];
+  _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1];
+  /*And advance the coded fragment list.*/
+  _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->ncoded_fragis[_pli]=0;
+  /*Apply the loop filter if necessary.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  if(_pipe->loop_filter){
+    oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values,
+     refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
+  }
+  else _sdelay=_edelay=0;
+  /*To fill borders, we have an additional two pixel delay, since a fragment
+     in the next row could filter its top edge, using two pixels from a
+     fragment in this row.
+    But there's no reason to delay a full fragment between the two.*/
+  oc_state_borders_fill_rows(&_enc->state,refi,_pli,
+   (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
+   (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
+}
+
+
+
+/*Cost information about the coded blocks in a MB.*/
+struct oc_rd_metric{
+  int uncoded_ac_ssd;
+  int coded_ac_ssd;
+  int ac_bits;
+  int dc_flag;
+};
+
+
+
+static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits,
+ oc_rd_metric *_mo,oc_token_checkpoint **_stack){
+  OC_ALIGN16(ogg_int16_t  dct[64]);
+  OC_ALIGN16(ogg_int16_t  data[64]);
+  ogg_uint16_t            dc_dequant;
+  const ogg_uint16_t     *dequant;
+  const oc_iquant        *enquant;
+  ptrdiff_t               frag_offs;
+  int                     ystride;
+  const unsigned char    *src;
+  const unsigned char    *ref;
+  unsigned char          *dst;
+  int                     frame_type;
+  int                     nonzero;
+  unsigned                uncoded_ssd;
+  unsigned                coded_ssd;
+  int                     coded_dc;
+  oc_token_checkpoint    *checkpoint;
+  oc_fragment            *frags;
+  int                     mb_mode;
+  int                     mv_offs[2];
+  int                     nmv_offs;
+  int                     ac_bits;
+  int                     borderi;
+  int                     qti;
+  int                     qii;
+  int                     pi;
+  int                     zzi;
+  int                     v;
+  int                     val;
+  int                     d;
+  int                     s;
+  int                     dc;
+  frags=_enc->state.frags;
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  ystride=_enc->state.ref_ystride[_pli];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
+  borderi=frags[_fragi].borderi;
+  qii=frags[_fragi].qii;
+  if(qii&~3){
+#if !defined(OC_COLLECT_METRICS)
+    /*Enable early skip detection.*/
+    frags[_fragi].coded=0;
+    return 0;
+#else
+    /*Try and code this block anyway.*/
+    qii&=3;
+    frags[_fragi].qii=qii;
+#endif
+  }
+  mb_mode=frags[_fragi].mb_mode;
+  ref=_enc->state.ref_frame_data[
+   _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs;
+  dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
+   +frag_offs;
+  /*Motion compensation:*/
+  switch(mb_mode){
+    case OC_MODE_INTRA:{
+      nmv_offs=0;
+      oc_enc_frag_sub_128(_enc,data,src,ystride);
+    }break;
+    case OC_MODE_GOLDEN_NOMV:
+    case OC_MODE_INTER_NOMV:{
+      nmv_offs=1;
+      mv_offs[0]=0;
+      oc_enc_frag_sub(_enc,data,src,ref,ystride);
+    }break;
+    default:{
+      const oc_mv *frag_mvs;
+      frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+      nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
+       frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
+      if(nmv_offs>1){
+        oc_enc_frag_copy2(_enc,dst,
+         ref+mv_offs[0],ref+mv_offs[1],ystride);
+        oc_enc_frag_sub(_enc,data,src,dst,ystride);
+      }
+      else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride);
+    }break;
+  }
+#if defined(OC_COLLECT_METRICS)
+  {
+    unsigned satd;
+    switch(nmv_offs){
+      case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
+      case 1:{
+        satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+      }break;
+      default:{
+        satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
+      }
+    }
+    _enc->frag_satd[_fragi]=satd;
+  }
+#endif
+  /*Transform:*/
+  oc_enc_fdct8x8(_enc,dct,data);
+  /*Quantize the DC coefficient:*/
+  qti=mb_mode!=OC_MODE_INTRA;
+  enquant=_pipe->enquant[_pli][0][qti];
+  dc_dequant=_pipe->dequant[_pli][0][qti][0];
+  v=dct[0];
+  val=v<<1;
+  s=OC_SIGNMASK(val);
+  val+=dc_dequant+s^s;
+  val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s;
+  dc=OC_CLAMPI(-580,val,580);
+  nonzero=0;
+  /*Quantize the AC coefficients:*/
+  dequant=_pipe->dequant[_pli][qii][qti];
+  enquant=_pipe->enquant[_pli][qii][qti];
+  for(zzi=1;zzi<64;zzi++){
+    v=dct[OC_FZIG_ZAG[zzi]];
+    d=dequant[zzi];
+    val=v<<1;
+    v=abs(val);
+    if(v>=d){
+      s=OC_SIGNMASK(val);
+      /*The bias added here rounds ties away from zero, since token
+         optimization can only decrease the magnitude of the quantized
+         value.*/
+      val+=d+s^s;
+      /*Note the arithmetic right shift is not guaranteed by ANSI C.
+        Hopefully no one still uses ones-complement architectures.*/
+      val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s;
+      data[zzi]=OC_CLAMPI(-580,val,580);
+      nonzero=zzi;
+    }
+    else data[zzi]=0;
+  }
+  /*Tokenize.*/
+  checkpoint=*_stack;
+  ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
+   _stack,qti?0:3);
+  /*Reconstruct.
+    TODO: nonzero may need to be adjusted after tokenization.*/
+  if(nonzero==0){
+    ogg_int16_t p;
+    int         ci;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)data[ci]=p;
+  }
+  else{
+    data[0]=dc*dc_dequant;
+    oc_idct8x8(&_enc->state,data,nonzero+1);
+  }
+  if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
+  else{
+    oc_enc_frag_recon_inter(_enc,dst,
+     nmv_offs==1?ref+mv_offs[0]:dst,ystride,data);
+  }
+  frame_type=_enc->state.frame_type;
+#if !defined(OC_COLLECT_METRICS)
+  if(frame_type!=OC_INTRA_FRAME)
+#endif
+  {
+    /*In retrospect, should we have skipped this block?*/
+    oc_enc_frag_sub(_enc,data,src,dst,ystride);
+    coded_ssd=coded_dc=0;
+    if(borderi<0){
+      for(pi=0;pi<64;pi++){
+        coded_ssd+=data[pi]*data[pi];
+        coded_dc+=data[pi];
+      }
+    }
+    else{
+      ogg_int64_t mask;
+      mask=_enc->state.borders[borderi].mask;
+      for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+        coded_ssd+=data[pi]*data[pi];
+        coded_dc+=data[pi];
+      }
+    }
+    /*Scale to match DCT domain.*/
+    coded_ssd<<=4;
+    /*We actually only want the AC contribution to the SSD.*/
+    coded_ssd-=coded_dc*coded_dc>>2;
+#if defined(OC_COLLECT_METRICS)
+    _enc->frag_ssd[_fragi]=coded_ssd;
+  }
+  if(frame_type!=OC_INTRA_FRAME){
+#endif
+    uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]];
+    if(uncoded_ssd<UINT_MAX){
+      /*Although the fragment coding overhead determination is accurate, it is
+         greedy, using very coarse-grained local information.
+        Allowing it to mildly discourage coding turns out to be beneficial, but
+         it's not clear that allowing it to encourage coding through negative
+         coding overhead deltas is useful.
+        For that reason, we disallow negative coding_overheads.*/
+      if(_overhead_bits<0)_overhead_bits=0;
+      if(uncoded_ssd<=coded_ssd+(_overhead_bits+ac_bits)*_enc->lambda&&
+       /*Don't allow luma blocks to be skipped in 4MV mode when VP3
+          compatibility is enabled.*/
+       (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){
+        /*Hm, not worth it; roll back.*/
+        oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
+        *_stack=checkpoint;
+        frags[_fragi].coded=0;
+        return 0;
+      }
+    }
+    else _mo->dc_flag=1;
+    _mo->uncoded_ac_ssd+=uncoded_ssd;
+    _mo->coded_ac_ssd+=coded_ssd;
+    _mo->ac_bits+=ac_bits;
+  }
+  oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii);
+  frags[_fragi].dc=dc;
+  frags[_fragi].coded=1;
+  return 1;
+}
+
+static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){
+  /*Worst case token stack usage for 4 fragments.*/
+  oc_token_checkpoint  stack[64*4];
+  oc_token_checkpoint *stackptr;
+  const oc_sb_map     *sb_maps;
+  signed char         *mb_modes;
+  oc_fragment         *frags;
+  ptrdiff_t           *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t           *uncoded_fragis;
+  ptrdiff_t            nuncoded_fragis;
+  oc_rd_metric         mo;
+  oc_fr_state          fr_checkpoint;
+  oc_qii_state         qs_checkpoint;
+  int                  mb_mode;
+  int                  ncoded;
+  ptrdiff_t            fragi;
+  int                  bi;
+  *&fr_checkpoint=*(_pipe->fr+0);
+  *&qs_checkpoint=*(_pipe->qs+0);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_modes=_enc->state.mb_modes;
+  frags=_enc->state.frags;
+  coded_fragis=_pipe->coded_fragis[0];
+  ncoded_fragis=_pipe->ncoded_fragis[0];
+  uncoded_fragis=_pipe->uncoded_fragis[0];
+  nuncoded_fragis=_pipe->nuncoded_fragis[0];
+  mb_mode=mb_modes[_mbi];
+  ncoded=0;
+  stackptr=stack;
+  memset(&mo,0,sizeof(mo));
+  for(bi=0;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].mb_mode=mb_mode;
+    if(oc_enc_block_transform_quantize(_enc,
+     _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){
+      oc_fr_code_block(_pipe->fr+0);
+      coded_fragis[ncoded_fragis++]=fragi;
+      ncoded++;
+    }
+    else{
+      *(uncoded_fragis-++nuncoded_fragis)=fragi;
+      oc_fr_skip_block(_pipe->fr+0);
+    }
+  }
+  if(_enc->state.frame_type!=OC_INTRA_FRAME){
+    if(ncoded>0&&!mo.dc_flag){
+      int cost;
+      /*Some individual blocks were worth coding.
+        See if that's still true when accounting for mode and MV overhead.*/
+      cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits
+       +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead);
+      if(mo.uncoded_ac_ssd<=cost){
+        /*Taking macroblock overhead into account, it is not worth coding this
+           MB.*/
+        oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack);
+        *(_pipe->fr+0)=*&fr_checkpoint;
+        *(_pipe->qs+0)=*&qs_checkpoint;
+        for(bi=0;bi<4;bi++){
+          fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+          if(frags[fragi].coded){
+            *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=0;
+          }
+          oc_fr_skip_block(_pipe->fr+0);
+        }
+        ncoded_fragis-=ncoded;
+        ncoded=0;
+      }
+    }
+    /*If no luma blocks coded, the mode is forced.*/
+    if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV;
+    /*Assume that a 1MV with a single coded block is always cheaper than a 4MV
+       with a single coded block.
+      This may not be strictly true: a 4MV computes chroma MVs using (0,0) for
+       skipped blocks, while a 1MV does not.*/
+    else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){
+      mb_modes[_mbi]=OC_MODE_INTER_MV;
+    }
+  }
+  _pipe->ncoded_fragis[0]=ncoded_fragis;
+  _pipe->nuncoded_fragis[0]=nuncoded_fragis;
+  return ncoded;
+}
+
+static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const oc_sb_map *sb_maps;
+  oc_sb_flags     *sb_flags;
+  ptrdiff_t       *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t       *uncoded_fragis;
+  ptrdiff_t        nuncoded_fragis;
+  int              sbi;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  uncoded_fragis=_pipe->uncoded_fragis[_pli];
+  nuncoded_fragis=_pipe->nuncoded_fragis[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    oc_rd_metric        mo;
+    int                 quadi;
+    int                 bi;
+    memset(&mo,0,sizeof(mo));
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        stackptr=stack;
+        if(oc_enc_block_transform_quantize(_enc,
+         _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){
+          coded_fragis[ncoded_fragis++]=fragi;
+          oc_fr_code_block(_pipe->fr+_pli);
+        }
+        else{
+          *(uncoded_fragis-++nuncoded_fragis)=fragi;
+          oc_fr_skip_block(_pipe->fr+_pli);
+        }
+      }
+    }
+    oc_fr_state_flush_sb(_pipe->fr+_pli);
+    sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full;
+    sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial;
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  _pipe->nuncoded_fragis[_pli]=nuncoded_fragis;
+}
+
+/*Mode decision is done by exhaustively examining all potential choices.
+  Obviously, doing the motion compensation, fDCT, tokenization, and then
+   counting the bits each token uses is computationally expensive.
+  Theora's EOB runs can also split the cost of these tokens across multiple
+   fragments, and naturally we don't know what the optimal choice of Huffman
+   codes will be until we know all the tokens we're going to encode in all the
+   fragments.
+  So we use a simple approach to estimating the bit cost and distortion of each
+   mode based upon the SATD value of the residual before coding.
+  The mathematics behind the technique are outlined by Kim \cite{Kim03}, but
+   the process (modified somewhat from that of the paper) is very simple.
+  We build a non-linear regression of the mappings from
+   (pre-transform+quantization) SATD to (post-transform+quantization) bits and
+   SSD for each qi.
+  A separate set of mappings is kept for each quantization type and color
+   plane.
+  The mappings are constructed by partitioning the SATD values into a small
+   number of bins (currently 24) and using a linear regression in each bin
+   (as opposed to the 0th-order regression used by Kim).
+  The bit counts and SSD measurements are obtained by examining actual encoded
+   frames, with appropriate lambda values and optimal Huffman codes selected.
+  EOB bits are assigned to the fragment that started the EOB run (as opposed to
+   dividing them among all the blocks in the run; though the latter approach
+   seems more theoretically correct, Monty's testing showed a small improvement
+   with the former, though that may have been merely statistical noise).
+
+  @ARTICLE{Kim03,
+    author="Hyun Mun Kim",
+    title="Adaptive Rate Control Using Nonlinear Regression",
+    journal="IEEE Transactions on Circuits and Systems for Video Technology",
+    volume=13,
+    number=5,
+    pages="432--439",
+    month=May,
+    year=2003
+  }*/
+
+/*Computes (_ssd+_lambda*_rate)/(1<<OC_BIT_SCALE) with rounding, avoiding
+   overflow for large lambda values.*/
+#define OC_MODE_RD_COST(_ssd,_rate,_lambda) \
+ ((_ssd)>>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \
+ +(((_ssd)&(1<<OC_BIT_SCALE)-1)+((_rate)&(1<<OC_BIT_SCALE)-1)*(_lambda) \
+ +((1<<OC_BIT_SCALE)>>1)>>OC_BIT_SCALE)
+
+/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after
+   prediction.*/
+static unsigned oc_dct_cost2(unsigned *_ssd,
+ int _qi,int _pli,int _qti,int _satd){
+  unsigned rmse;
+  int      bin;
+  int      dx;
+  int      y0;
+  int      z0;
+  int      dy;
+  int      dz;
+  /*SATD metrics for chroma planes vary much less than luma, so we scale them
+     by 4 to distribute them into the mode decision bins more evenly.*/
+  _satd<<=_pli+1&2;
+  bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2);
+  dx=_satd-(bin<<OC_SAD_SHIFT);
+  y0=OC_MODE_RD[_qi][_pli][_qti][bin].rate;
+  z0=OC_MODE_RD[_qi][_pli][_qti][bin].rmse;
+  dy=OC_MODE_RD[_qi][_pli][_qti][bin+1].rate-y0;
+  dz=OC_MODE_RD[_qi][_pli][_qti][bin+1].rmse-z0;
+  rmse=OC_MAXI(z0+(dz*dx>>OC_SAD_SHIFT),0);
+  *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
+  return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0);
+}
+
+/*Select luma block-level quantizers for a MB in an INTRA frame.*/
+static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,unsigned _mbi){
+  const unsigned char *src;
+  const ptrdiff_t     *frag_buf_offs;
+  const oc_sb_map     *sb_maps;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  ptrdiff_t            fragi;
+  oc_qii_state         qs[4][3];
+  unsigned             cost[4][3];
+  unsigned             ssd[4][3];
+  unsigned             rate[4][3];
+  int                  prev[3][3];
+  unsigned             satd;
+  unsigned             best_cost;
+  unsigned             best_ssd;
+  unsigned             best_rate;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  int                  bi;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  fragi=sb_maps[_mbi>>2][_mbi&3][0];
+  frag_offs=frag_buf_offs[fragi];
+  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  nqis=_enc->state.nqis;
+  lambda=_enc->lambda;
+  for(qii=0;qii<nqis;qii++){
+    oc_qii_state_advance(qs[0]+qii,_qs,qii);
+    rate[0][qii]=oc_dct_cost2(ssd[0]+qii,_enc->state.qis[qii],0,0,satd)
+     +(qs[0][qii].bits-_qs->bits<<OC_BIT_SCALE);
+    cost[0][qii]=OC_MODE_RD_COST(ssd[0][qii],rate[0][qii],lambda);
+  }
+  for(bi=1;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frag_offs=frag_buf_offs[fragi];
+    satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+    for(qii=0;qii<nqis;qii++){
+      oc_qii_state qt[3];
+      unsigned     cur_ssd;
+      unsigned     cur_rate;
+      int          best_qij;
+      int          qij;
+      oc_qii_state_advance(qt+0,qs[bi-1]+0,qii);
+      cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,0,satd);
+      best_ssd=ssd[bi-1][0]+cur_ssd;
+      best_rate=rate[bi-1][0]+cur_rate
+       +(qt[0].bits-qs[bi-1][0].bits<<OC_BIT_SCALE);
+      best_cost=OC_MODE_RD_COST(best_ssd,best_rate,lambda);
+      best_qij=0;
+      for(qij=1;qij<nqis;qij++){
+        unsigned chain_ssd;
+        unsigned chain_rate;
+        unsigned chain_cost;
+        oc_qii_state_advance(qt+qij,qs[bi-1]+qij,qii);
+        chain_ssd=ssd[bi-1][qij]+cur_ssd;
+        chain_rate=rate[bi-1][qij]+cur_rate
+         +(qt[qij].bits-qs[bi-1][qij].bits<<OC_BIT_SCALE);
+        chain_cost=OC_MODE_RD_COST(chain_ssd,chain_rate,lambda);
+        if(chain_cost<best_cost){
+          best_cost=chain_cost;
+          best_ssd=chain_ssd;
+          best_rate=chain_rate;
+          best_qij=qij;
+        }
+      }
+      *(qs[bi]+qii)=*(qt+best_qij);
+      cost[bi][qii]=best_cost;
+      ssd[bi][qii]=best_ssd;
+      rate[bi][qii]=best_rate;
+      prev[bi-1][qii]=best_qij;
+    }
+  }
+  best_qii=0;
+  best_cost=cost[3][0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[3][qii]<best_cost){
+      best_cost=cost[3][qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  for(bi=3;;){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].qii=best_qii;
+    if(bi--<=0)break;
+    best_qii=prev[bi][best_qii];
+  }
+  return best_cost;
+}
+
+/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/
+static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){
+  const unsigned char *src;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  oc_qii_state         qt[3];
+  unsigned             cost[3];
+  unsigned             satd;
+  unsigned             best_cost;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[_pli];
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  nqis=_enc->state.nqis;
+  lambda=_enc->lambda;
+  best_qii=0;
+  for(qii=0;qii<nqis;qii++){
+    unsigned cur_rate;
+    unsigned cur_ssd;
+    oc_qii_state_advance(qt+qii,_qs,qii);
+    cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],_pli,0,satd)
+     +(qt[qii].bits-_qs->bits<<OC_BIT_SCALE);
+    cost[qii]=OC_MODE_RD_COST(cur_ssd,cur_rate,lambda);
+  }
+  best_cost=cost[0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[qii]<best_cost){
+      best_cost=cost[qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  frags[_fragi].qii=best_qii;
+  return best_cost;
+}
+
+static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const oc_sb_map *sb_maps;
+  oc_sb_flags     *sb_flags;
+  ptrdiff_t       *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  int              sbi;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    int                 quadi;
+    int                 bi;
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi);
+        stackptr=stack;
+        oc_enc_block_transform_quantize(_enc,
+         _pipe,_pli,fragi,0,NULL,&stackptr);
+        coded_fragis[ncoded_fragis++]=fragi;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+}
+
+/*Analysis stage for an INTRA frame.*/
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){
+  oc_enc_pipeline_state   pipe;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_mb_map        *mb_maps;
+  oc_mb_enc_info         *embs;
+  oc_fragment            *frags;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  int                     refi;
+  int                     pli;
+  _enc->state.frame_type=OC_INTRA_FRAME;
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&pipe);
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  embs=_enc->mb_info;
+  frags=_enc->state.frags;
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    unsigned sbi;
+    unsigned sbi_end;
+    notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+    sbi_end=pipe.sbi_end[0];
+    for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        unsigned  mbi;
+        int       mapii;
+        int       mapi;
+        int       bi;
+        ptrdiff_t fragi;
+        mbi=sbi<<2|quadi;
+        /*Motion estimation:
+          We always do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not.*/
+        if(!_recode&&_enc->state.curframe_num>0)oc_mcenc_search(_enc,mbi);
+        oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi);
+        mb_modes[mbi]=OC_MODE_INTRA;
+        oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0);
+        /*Propagate final MB mode and MVs to the chroma blocks.*/
+        for(mapii=4;mapii<nmap_idxs;mapii++){
+          mapi=map_idxs[mapii];
+          pli=mapi>>2;
+          bi=mapi&3;
+          fragi=mb_maps[mbi][pli][bi];
+          frags[fragi].mb_mode=OC_MODE_INTRA;
+        }
+      }
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe,
+       pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  _enc->state.ntotal_coded_fragis=_enc->state.nfrags;
+}
+
+
+
+/*Cost information about a MB mode.*/
+struct oc_mode_choice{
+  unsigned      cost;
+  unsigned      ssd;
+  unsigned      rate;
+  unsigned      overhead;
+  unsigned char qii[12];
+};
+
+
+
+static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){
+  _modec->cost=OC_MODE_RD_COST(_modec->ssd,
+   _modec->rate+_modec->overhead,_lambda);
+}
+
+/*A set of skip SSD's to use to disable early skipping.*/
+static const unsigned OC_NOSKIP[12]={
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX
+};
+
+/*The estimated number of bits used by a coded chroma block to specify the AC
+   quantizer.
+  TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression);
+   measurements suggest this is in the right ballpark, but it varies somewhat
+   with lambda.*/
+#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1)
+
+static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+  oc_fr_state  fr;
+  oc_qii_state qs;
+  unsigned     ssd;
+  unsigned     rate;
+  int          overhead;
+  unsigned     satd;
+  unsigned     best_ssd;
+  unsigned     best_rate;
+  int          best_overhead;
+  int          best_fri;
+  int          best_qii;
+  unsigned     cur_cost;
+  unsigned     cur_ssd;
+  unsigned     cur_rate;
+  int          cur_overhead;
+  int          lambda;
+  int          nqis;
+  int          nskipped;
+  int          bi;
+  int          qii;
+  lambda=_enc->lambda;
+  nqis=_enc->state.nqis;
+  /*We could do a trellis optimization here, but we don't make final skip
+     decisions until after transform+quantization, so the result wouldn't be
+     optimal anyway.
+    Instead we just use a greedy approach; for most SATD values, the
+     differences between the qiis are large enough to drown out the cost to
+     code the flags, anyway.*/
+  *&fr=*_fr;
+  *&qs=*_qs;
+  ssd=rate=overhead=nskipped=0;
+  for(bi=0;bi<4;bi++){
+    oc_fr_state  ft[2];
+    oc_qii_state qt[3];
+    unsigned     best_cost;
+    satd=_frag_satd[bi];
+    *(ft+0)=*&fr;
+    oc_fr_code_block(ft+0);
+    oc_qii_state_advance(qt+0,&qs,0);
+    best_overhead=(ft[0].bits-fr.bits<<OC_BIT_SCALE);
+    best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],0,_qti,satd)
+     +(qt[0].bits-qs.bits<<OC_BIT_SCALE);
+    best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate+best_overhead,lambda);
+    best_fri=0;
+    best_qii=0;
+    for(qii=1;qii<nqis;qii++){
+      oc_qii_state_advance(qt+qii,&qs,qii);
+      cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
+       +(qt[qii].bits-qs.bits<<OC_BIT_SCALE);
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate+best_overhead,lambda);
+      if(cur_cost<best_cost){
+        best_cost=cur_cost;
+        best_ssd=cur_ssd;
+        best_rate=cur_rate;
+        best_qii=qii;
+      }
+    }
+    if(_skip_ssd[bi]<UINT_MAX&&nskipped<3){
+      *(ft+1)=*&fr;
+      oc_fr_skip_block(ft+1);
+      cur_overhead=ft[1].bits-fr.bits<<OC_BIT_SCALE;
+      cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_overhead,lambda);
+      if(cur_cost<=best_cost){
+        best_ssd=cur_ssd;
+        best_rate=0;
+        best_overhead=cur_overhead;
+        best_fri=1;
+        best_qii+=4;
+      }
+    }
+    rate+=best_rate;
+    ssd+=best_ssd;
+    overhead+=best_overhead;
+    *&fr=*(ft+best_fri);
+    if(best_fri==0)*&qs=*(qt+best_qii);
+    else nskipped++;
+    _modec->qii[bi]=best_qii;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+  _modec->overhead=OC_MAXI(overhead,0);
+}
+
+static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+  unsigned ssd;
+  unsigned rate;
+  unsigned satd;
+  unsigned best_ssd;
+  unsigned best_rate;
+  int      best_qii;
+  unsigned cur_cost;
+  unsigned cur_ssd;
+  unsigned cur_rate;
+  int      lambda;
+  int      nblocks;
+  int      nqis;
+  int      pli;
+  int      bi;
+  int      qii;
+  lambda=_enc->lambda;
+  nqis=_enc->state.nqis;
+  ssd=_modec->ssd;
+  rate=_modec->rate;
+  /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded
+     order, we assume a constant overhead for coded block and qii flags.*/
+  nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  nblocks=(nblocks-4>>1)+4;
+  bi=4;
+  for(pli=1;pli<3;pli++){
+    for(;bi<nblocks;bi++){
+      unsigned best_cost;
+      satd=_frag_satd[bi];
+      best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],pli,_qti,satd)
+       +OC_CHROMA_QII_RATE;
+      best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
+      best_qii=0;
+      for(qii=1;qii<nqis;qii++){
+        cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
+         +OC_CHROMA_QII_RATE;
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
+        if(cur_cost<best_cost){
+          best_cost=cur_cost;
+          best_ssd=cur_ssd;
+          best_rate=cur_rate;
+          best_qii=qii;
+        }
+      }
+      if(_skip_ssd[bi]<UINT_MAX){
+        cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate,lambda);
+        if(cur_cost<=best_cost){
+          best_ssd=cur_ssd;
+          best_rate=0;
+          best_qii+=4;
+        }
+      }
+      rate+=best_rate;
+      ssd+=best_ssd;
+      _modec->qii[bi]=best_qii;
+    }
+    nblocks=(nblocks-4<<1)+4;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+}
+
+static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
+ unsigned _mbi,unsigned _ssd[12]){
+  OC_ALIGN16(ogg_int16_t  buffer[64]);
+  const unsigned char    *src;
+  const unsigned char    *ref;
+  int                     ystride;
+  const oc_fragment      *frags;
+  const ptrdiff_t        *frag_buf_offs;
+  const ptrdiff_t        *sb_map;
+  const oc_mb_map_plane  *mb_map;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  ogg_int64_t             mask;
+  unsigned                uncoded_ssd;
+  int                     uncoded_dc;
+  unsigned                dc_dequant;
+  int                     dc_flag;
+  int                     mapii;
+  int                     mapi;
+  int                     pli;
+  int                     bi;
+  ptrdiff_t               fragi;
+  ptrdiff_t               frag_offs;
+  int                     borderi;
+  int                     pi;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+  ystride=_enc->state.ref_ystride[0];
+  frags=_enc->state.frags;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0];
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
+    borderi=frags[fragi].borderi;
+    uncoded_ssd=uncoded_dc=0;
+    if(borderi<0){
+      for(pi=0;pi<64;pi++){
+        uncoded_ssd+=buffer[pi]*buffer[pi];
+        uncoded_dc+=buffer[pi];
+      }
+    }
+    else{
+      ogg_int64_t mask;
+      mask=_enc->state.borders[borderi].mask;
+      for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+        uncoded_ssd+=buffer[pi]*buffer[pi];
+        uncoded_dc+=buffer[pi];
+      }
+    }
+    /*Scale to match DCT domain.*/
+    uncoded_ssd<<=4;
+    /*We actually only want the AC contribution to the SSD.*/
+    uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+    /*DC is a special case; if there's more than a full-quantizer improvement
+       in the effective DC component, always force-code the block.*/
+    dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+    uncoded_ssd|=-dc_flag;
+    _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=(map_nidxs-4>>1)+4;
+  mapii=4;
+  for(pli=1;pli<3;pli++){
+    ystride=_enc->state.ref_ystride[pli];
+    dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0];
+    for(;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
+      borderi=frags[fragi].borderi;
+      uncoded_ssd=uncoded_dc=0;
+      if(borderi<0){
+        for(pi=0;pi<64;pi++){
+          uncoded_ssd+=buffer[pi]*buffer[pi];
+          uncoded_dc+=buffer[pi];
+        }
+      }
+      else{
+        mask=_enc->state.borders[borderi].mask;
+        for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+          uncoded_ssd+=buffer[pi]*buffer[pi];
+          uncoded_dc+=buffer[pi];
+        }
+      }
+      /*Scale to match DCT domain.*/
+      uncoded_ssd<<=4;
+      /*We actually only want the AC contribution to the SSD.*/
+      uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+      /*DC is a special case; if there's more than a full-quantizer improvement
+         in the effective DC component, always force-code the block.*/
+      dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+      uncoded_ssd|=-dc_flag;
+      _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
+    }
+    map_nidxs=(map_nidxs-4<<1)+4;
+  }
+}
+
+static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _frag_satd[12]){
+  const unsigned char   *src;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    ystride;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  }
+}
+
+static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+  unsigned               frag_satd[12];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    dx;
+  int                    dy;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[
+   _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  dx=_mv[0];
+  dy=_mv[1];
+  _modec->rate=_modec->ssd=0;
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+  }
+  else{
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+  }
+  else{
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+  static const oc_mv OC_MV_ZERO;
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd);
+}
+
+static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+  int bits0;
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd);
+  bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31];
+  _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+  return bits0;
+}
+
+/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/
+static const unsigned char OC_MB_PHASE[4][4]={
+  {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0}
+};
+
+static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+  unsigned               frag_satd[12];
+  oc_mv                  lbmvs[4];
+  oc_mv                  cbmvs[4];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  oc_mv                 *frag_mvs;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    nqis;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    dx;
+  int                    dy;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  int                    bits0;
+  int                    bits1;
+  unsigned               satd;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  frag_mvs=_enc->state.frag_mvs;
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  _modec->rate=_modec->ssd=0;
+  for(bi=0;bi<4;bi++){
+    fragi=mb_map[0][bi];
+    dx=_mv[bi][0];
+    dy=_mv[bi][1];
+    /*Save the block MVs as the current ones while we're here; we'll replace
+       them if we don't ultimately choose 4MV mode.*/
+    frag_mvs[fragi][0]=(signed char)dx;
+    frag_mvs[fragi][1]=(signed char)dy;
+    frag_offs=frag_buf_offs[fragi];
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+    else{
+      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+    frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
+   _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1);
+  /*Figure out which blocks are being skipped and give them (0,0) MVs.*/
+  bits0=0;
+  bits1=0;
+  nqis=_enc->state.nqis;
+  for(bi=0;bi<4;bi++){
+    if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
+      memset(lbmvs+bi,0,sizeof(*lbmvs));
+    }
+    else{
+      memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
+      bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
+      bits1+=12;
+    }
+  }
+  (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
+   (const oc_mv *)lbmvs);
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    dx=cbmvs[bi][0];
+    dy=cbmvs[bi][1];
+    frag_offs=frag_buf_offs[fragi];
+    /*TODO: We could save half these calls by re-using the results for the Cb
+       and Cr planes; is it worth it?*/
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
+      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+    else{
+      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+    frag_satd[mapii]=satd;
+  }
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR)
+   +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_enc_pipeline_state   pipe;
+  oc_qii_state            intra_luma_qs;
+  oc_mv                   last_mv;
+  oc_mv                   prior_mv;
+  ogg_int64_t             interbits;
+  ogg_int64_t             intrabits;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  unsigned               *coded_mbis;
+  unsigned               *uncoded_mbis;
+  size_t                  ncoded_mbis;
+  size_t                  nuncoded_mbis;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_sb_map        *sb_maps;
+  const oc_mb_map        *mb_maps;
+  oc_mb_enc_info         *embs;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  int                     qi;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  int                     vdec;
+  unsigned                sbi;
+  unsigned                sbi_end;
+  int                     refi;
+  int                     pli;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+  _enc->state.frame_type=OC_INTER_FRAME;
+  oc_mode_scheme_chooser_reset(&_enc->chooser);
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&pipe);
+  if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs);
+  _enc->mv_bits[0]=_enc->mv_bits[1]=0;
+  interbits=intrabits=0;
+  last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  qi=_enc->state.qis[0];
+  coded_mbis=_enc->coded_mbis;
+  uncoded_mbis=coded_mbis+_enc->state.nmbs;
+  ncoded_mbis=0;
+  nuncoded_mbis=0;
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  embs=_enc->mb_info;
+  frags=_enc->state.frags;
+  frag_mvs=_enc->state.frag_mvs;
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+    sbi_end=pipe.sbi_end[0];
+    for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        oc_mode_choice modes[8];
+        unsigned       skip_ssd[12];
+        unsigned       intra_satd[12];
+        int            mb_mv_bits_0;
+        int            mb_gmv_bits_0;
+        int            inter_mv_pref;
+        int            mb_mode;
+        int            dx;
+        int            dy;
+        unsigned       mbi;
+        int            mapii;
+        int            mapi;
+        int            bi;
+        ptrdiff_t      fragi;
+        mbi=sbi<<2|quadi;
+        /*Motion estimation:
+          We always do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not.*/
+        if(!_recode)oc_mcenc_search(_enc,mbi);
+        dx=dy=0;
+        /*Find the block choice with the lowest estimated coding cost.
+          If a Cb or Cr block is coded but no Y' block from a macro block then
+           the mode MUST be OC_MODE_INTER_NOMV.
+          This is the default state to which the mode data structure is
+           initialised in encoder and decoder at the start of each frame.*/
+        /*Block coding cost is estimated from correlated SATD metrics.*/
+        /*At this point, all blocks that are in frame are still marked coded.*/
+        if(!_recode){
+          memcpy(embs[mbi].unref_mv,
+           embs[mbi].analysis_mv[0],sizeof(embs[mbi].unref_mv));
+          embs[mbi].refined=0;
+        }
+        oc_mb_intra_satd(_enc,mbi,intra_satd);
+        /*Estimate the cost of coding this MB in a keyframe.*/
+        if(_allow_keyframe){
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           pipe.fr+0,&intra_luma_qs,intra_satd,OC_NOSKIP);
+          intrabits+=modes[OC_MODE_INTRA].rate;
+          for(bi=0;bi<4;bi++){
+            oc_qii_state_advance(&intra_luma_qs,&intra_luma_qs,
+             modes[OC_MODE_INTRA].qii[bi]);
+          }
+        }
+        /*Estimate the cost in a delta frame for various modes.*/
+        oc_skip_cost(_enc,&pipe,mbi,skip_ssd);
+        oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+         OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
+        oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+         pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd);
+        mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+         OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
+         pipe.fr+0,pipe.qs+0,skip_ssd);
+        oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
+         OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+        oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
+         OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+        oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
+         OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
+        mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+         OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
+         pipe.fr+0,pipe.qs+0,skip_ssd);
+        oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+         embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+        /*The explicit MV modes (2,6,7) have not yet gone through halfpel
+           refinement.
+          We choose the explicit MV mode that's already furthest ahead on bits
+           and refine only that one.
+          We have to be careful to remember which ones we've refined so that
+           we don't refine it again if we re-encode this frame.*/
+        inter_mv_pref=_enc->lambda*3;
+        if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_INTER_MV].cost&&
+         modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
+          if(!(embs[mbi].refined&0x80)){
+            oc_mcenc_refine4mv(_enc,mbi);
+            embs[mbi].refined|=0x80;
+          }
+          oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+           embs[mbi].ref_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+        }
+        else if(modes[OC_MODE_GOLDEN_MV].cost+inter_mv_pref<
+         modes[OC_MODE_INTER_MV].cost){
+          if(!(embs[mbi].refined&0x40)){
+            oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_GOLD);
+            embs[mbi].refined|=0x40;
+          }
+          mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+           OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
+           pipe.fr+0,pipe.qs+0,skip_ssd);
+        }
+        if(!(embs[mbi].refined&0x04)){
+          oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_PREV);
+          embs[mbi].refined|=0x04;
+        }
+        mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+         OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
+         pipe.fr+0,pipe.qs+0,skip_ssd);
+        /*Finally, pick the mode with the cheapest estimated R-D cost.*/
+        mb_mode=0;
+        if(modes[1].cost<modes[0].cost)mb_mode=1;
+        if(modes[3].cost<modes[mb_mode].cost)mb_mode=3;
+        if(modes[4].cost<modes[mb_mode].cost)mb_mode=4;
+        if(modes[5].cost<modes[mb_mode].cost)mb_mode=5;
+        if(modes[6].cost<modes[mb_mode].cost)mb_mode=6;
+        if(modes[7].cost<modes[mb_mode].cost)mb_mode=7;
+        /*We prefer OC_MODE_INTER_MV, but not over LAST and LAST2.*/
+        if(mb_mode==OC_MODE_INTER_MV_LAST||mb_mode==OC_MODE_INTER_MV_LAST2){
+          inter_mv_pref=0;
+        }
+        if(modes[2].cost<modes[mb_mode].cost+inter_mv_pref)mb_mode=2;
+        mb_modes[mbi]=mb_mode;
+        /*Propagate the MVs to the luma blocks.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV][0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV][1];
+            }break;
+            case OC_MODE_INTER_MV_LAST:{
+              dx=last_mv[0];
+              dy=last_mv[1];
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              dx=prior_mv[0];
+              dy=prior_mv[1];
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][1];
+            }break;
+          }
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            frag_mvs[fragi][0]=(signed char)dx;
+            frag_mvs[fragi][1]=(signed char)dy;
+          }
+        }
+        for(bi=0;bi<4;bi++){
+          fragi=sb_maps[mbi>>2][mbi&3][bi];
+          frags[fragi].qii=modes[mb_mode].qii[bi];
+        }
+        if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,
+         modes[mb_mode].overhead>>OC_BIT_SCALE)>0){
+          int orig_mb_mode;
+          orig_mb_mode=mb_mode;
+          mb_mode=mb_modes[mbi];
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              /*If we're backing out from 4MV, find the MV we're actually
+                 using.*/
+              if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
+                for(bi=0;;bi++){
+                  fragi=mb_maps[mbi][0][bi];
+                  if(frags[fragi].coded){
+                    memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+                    dx=frag_mvs[fragi][0];
+                    dy=frag_mvs[fragi][1];
+                    break;
+                  }
+                }
+                mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31];
+              }
+              /*Otherwise we used the original analysis MV.*/
+              else{
+                memcpy(last_mv,
+                 embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
+              }
+              _enc->mv_bits[0]+=mb_mv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              oc_mv tmp_mv;
+              memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              memcpy(last_mv,tmp_mv,sizeof(last_mv));
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              _enc->mv_bits[0]+=mb_gmv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_FOUR:{
+              oc_mv lbmvs[4];
+              oc_mv cbmvs[4];
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              for(bi=0;bi<4;bi++){
+                fragi=mb_maps[mbi][0][bi];
+                if(frags[fragi].coded){
+                  memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+                  memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
+                  _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
+                   +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
+                  _enc->mv_bits[1]+=12;
+                }
+                /*Replace the block MVs for not-coded blocks with (0,0).*/
+                else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
+              }
+              (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+              for(mapii=4;mapii<nmap_idxs;mapii++){
+                mapi=map_idxs[mapii];
+                pli=mapi>>2;
+                bi=mapi&3;
+                fragi=mb_maps[mbi][pli][bi];
+                frags[fragi].mb_mode=mb_mode;
+                frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
+                memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
+              }
+            }break;
+          }
+          coded_mbis[ncoded_mbis++]=mbi;
+          oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode);
+          interbits+=modes[mb_mode].rate+modes[mb_mode].overhead;
+        }
+        else{
+          *(uncoded_mbis-++nuncoded_mbis)=mbi;
+          mb_mode=OC_MODE_INTER_NOMV;
+          dx=dy=0;
+        }
+        /*Propagate final MB mode and MVs to the chroma blocks.
+          This has already been done for 4MV mode, since it requires individual
+           block motion vectors.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          for(mapii=4;mapii<nmap_idxs;mapii++){
+            mapi=map_idxs[mapii];
+            pli=mapi>>2;
+            bi=mapi&3;
+            fragi=mb_maps[mbi][pli][bi];
+            frags[fragi].mb_mode=mb_mode;
+            /*If we switched from 4MV mode to INTER_MV mode, then the qii
+               values won't have been chosen with the right MV, but it's
+               probaby not worth re-estimating them.*/
+            frags[fragi].qii=modes[mb_mode].qii[mapii];
+            frag_mvs[fragi][0]=(signed char)dx;
+            frag_mvs[fragi][1]=(signed char)dy;
+          }
+        }
+      }
+      oc_fr_state_flush_sb(pipe.fr+0);
+      sb_flags[sbi].coded_fully=pipe.fr[0].sb_full;
+      sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial;
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_chroma(_enc,&pipe,
+       pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  /*Finish adding flagging overhead costs to inter bit counts to determine if
+     we should have coded a key frame instead.*/
+  if(_allow_keyframe){
+    if(interbits>intrabits)return 1;
+    /*Technically the chroma plane counts are over-estimations, because they
+       don't account for continuing runs from the luma planes, but the
+       inaccuracy is small.*/
+    for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<<OC_BIT_SCALE;
+    interbits+=OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+    interbits+=
+     _enc->chooser.scheme_bits[_enc->chooser.scheme_list[0]]<<OC_BIT_SCALE;
+    if(interbits>intrabits)return 1;
+  }
+  _enc->ncoded_mbis=ncoded_mbis;
+  /*Compact the coded fragment list.*/
+  {
+    ptrdiff_t ncoded_fragis;
+    ncoded_fragis=_enc->state.ncoded_fragis[0];
+    for(pli=1;pli<3;pli++){
+      memmove(_enc->state.coded_fragis+ncoded_fragis,
+       _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset,
+       _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis));
+      ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    }
+    _enc->state.ntotal_coded_fragis=ncoded_fragis;
+  }
+  return 0;
+}
+
+#if defined(OC_COLLECT_METRICS)
+# include <stdio.h>
+# include <math.h>
+
+/*TODO: It may be helpful (for block-level quantizers especially) to separate
+   out the contributions from AC and DC into separate tables.*/
+
+# define OC_ZWEIGHT   (0.25)
+
+static void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _satd,int _rate,double _rmse){
+  double rate;
+  /*Accumulate statistics without the scaling; this lets us change the scale
+     factor yet still use old data.*/
+  rate=ldexp(_rate,-OC_BIT_SCALE);
+  if(_metrics->fragw>0){
+    double dsatd;
+    double drate;
+    double drmse;
+    double w;
+    dsatd=_satd-_metrics->satd/_metrics->fragw;
+    drate=rate-_metrics->rate/_metrics->fragw;
+    drmse=_rmse-_metrics->rmse/_metrics->fragw;
+    w=_metrics->fragw*_w/(_metrics->fragw+_w);
+    _metrics->satd2+=dsatd*dsatd*w;
+    _metrics->satdrate+=dsatd*drate*w;
+    _metrics->rate2+=drate*drate*w;
+    _metrics->satdrmse+=dsatd*drmse*w;
+    _metrics->rmse2+=drmse*drmse*w;
+  }
+  _metrics->fragw+=_w;
+  _metrics->satd+=_satd*_w;
+  _metrics->rate+=rate*_w;
+  _metrics->rmse+=_rmse*_w;
+}
+
+static void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n){
+  int i;
+  /*Find a non-empty set of metrics.*/
+  for(i=0;i<_n&&_src[i].fragw<=0;i++);
+  if(i>=_n){
+    memset(_dst,0,sizeof(*_dst));
+    return;
+  }
+  memcpy(_dst,_src+i,sizeof(*_dst));
+  /*And iterate over the remaining non-empty sets of metrics.*/
+  for(i++;i<_n;i++)if(_src[i].fragw>0){
+    double wa;
+    double wb;
+    double dsatd;
+    double drate;
+    double drmse;
+    double w;
+    wa=_dst->fragw;
+    wb=_src[i].fragw;
+    dsatd=_src[i].satd/wb-_dst->satd/wa;
+    drate=_src[i].rate/wb-_dst->rate/wa;
+    drmse=_src[i].rmse/wb-_dst->rmse/wa;
+    w=wa*wb/(wa+wb);
+    _dst->fragw+=_src[i].fragw;
+    _dst->satd+=_src[i].satd;
+    _dst->rate+=_src[i].rate;
+    _dst->rmse+=_src[i].rmse;
+    _dst->satd2+=_src[i].satd2+dsatd*dsatd*w;
+    _dst->satdrate+=_src[i].satdrate+dsatd*drate*w;
+    _dst->rate2+=_src[i].rate2+drate*drate*w;
+    _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w;
+    _dst->rmse2+=_src[i].rmse2+drmse*drmse*w;
+  }
+}
+
+/*Compile collected SATD/rate/RMSE metrics into a form that's immediately
+   useful for mode decision.*/
+static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){
+  int pli;
+  int qti;
+  oc_restore_fpu(&_enc->state);
+  /*Convert raw collected data into cleaned up sample points.*/
+  for(pli=0;pli<3;pli++){
+    for(qti=0;qti<2;qti++){
+      double fragw;
+      int    bin0;
+      int    bin1;
+      int    bin;
+      fragw=0;
+      bin0=bin1=0;
+      for(bin=0;bin<OC_SAD_BINS;bin++){
+        oc_mode_metrics metrics;
+        OC_MODE_RD[_qi][pli][qti][bin].rate=0;
+        OC_MODE_RD[_qi][pli][qti][bin].rmse=0;
+        /*Find some points on either side of the current bin.*/
+        while((bin1<bin+1||fragw<OC_ZWEIGHT)&&bin1<OC_SAD_BINS-1){
+          fragw+=OC_MODE_METRICS[_qi][pli][qti][bin1++].fragw;
+        }
+        while(bin0+1<bin&&bin0+1<bin1&&
+         fragw-OC_MODE_METRICS[_qi][pli][qti][bin0].fragw>=OC_ZWEIGHT){
+          fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw;
+        }
+        /*Merge statistics and fit lines.*/
+        oc_mode_metrics_merge(&metrics,
+         OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0);
+        if(metrics.fragw>0&&metrics.satd2>0){
+          double a;
+          double b;
+          double msatd;
+          double mrate;
+          double mrmse;
+          double rate;
+          double rmse;
+          msatd=metrics.satd/metrics.fragw;
+          mrate=metrics.rate/metrics.fragw;
+          mrmse=metrics.rmse/metrics.fragw;
+          /*Compute the points on these lines corresponding to the actual bin
+             value.*/
+          b=metrics.satdrate/metrics.satd2;
+          a=mrate-b*msatd;
+          rate=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_BIT_SCALE);
+          OC_MODE_RD[_qi][pli][qti][bin].rate=
+           (ogg_int16_t)OC_CLAMPI(-32768,(int)(rate+0.5),32767);
+          b=metrics.satdrmse/metrics.satd2;
+          a=mrmse-b*msatd;
+          rmse=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_RMSE_SCALE);
+          OC_MODE_RD[_qi][pli][qti][bin].rmse=
+           (ogg_int16_t)OC_CLAMPI(-32768,(int)(rmse+0.5),32767);
+        }
+      }
+    }
+  }
+}
+
+
+
+/*The following token skipping code used to also be used in the decoder (and
+   even at one point other places in the encoder).
+  However, it was obsoleted by other optimizations, and is now only used here.
+  It has been moved here to avoid generating the code when it's not needed.*/
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.*/
+typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
+
+/*Handles the simple end of block tokens.*/
+static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
+  int nblocks_adjust;
+  nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
+  return -_extra_bits-nblocks_adjust;
+}
+
+/*The last EOB token has a special case, where an EOB run of size zero ends all
+   the remaining blocks in the frame.*/
+static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
+  /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
+     yet available everywhere; this should be equivalent.*/
+  if(!_extra_bits)return -(~(size_t)0>>1);
+  return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
+  return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static ptrdiff_t oc_token_skip_val(void){
+  return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
+  return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+  int run_cati;
+  int ncoeffs_mask;
+  int ncoeffs_adjust;
+  run_cati=_token-OC_DCT_RUN_CAT1B;
+  ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
+  ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
+  return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+   a given token value.
+  This reduces all the conditional branches, etc., needed to parse these token
+   values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob6,
+  oc_token_skip_zrl,
+  oc_token_skip_zrl,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.
+          0 will never be returned, so that at least one coefficient in one
+           block will always be decoded for every token.*/
+static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
+  return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
+  static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+     0,16,16,16,16,16,32,32,
+    32,32,32,32,32,32,32,48,
+    48,48,48,48,48,48,48,48,
+    48,48,48,48,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64
+  };
+  const oc_fragment *frags;
+  const unsigned    *frag_satd;
+  const unsigned    *frag_ssd;
+  const ptrdiff_t   *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  double             fragw;
+  int                qti;
+  int                qii;
+  int                qi;
+  int                pli;
+  int                zzi;
+  int                token;
+  int                eb;
+  oc_restore_fpu(&_enc->state);
+  /*Load any existing mode metrics if we haven't already.*/
+  if(!oc_has_mode_metrics){
+    FILE *fmetrics;
+    memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS));
+    fmetrics=fopen("modedec.stats","rb");
+    if(fmetrics!=NULL){
+      fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+      fclose(fmetrics);
+    }
+    for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+    oc_has_mode_metrics=1;
+  }
+  qti=_enc->state.frame_type;
+  frags=_enc->state.frags;
+  frag_satd=_enc->frag_satd;
+  frag_ssd=_enc->frag_ssd;
+  coded_fragis=_enc->state.coded_fragis;
+  ncoded_fragis=fragii=0;
+  /*Weight the fragments by the inverse frame size; this prevents HD content
+     from dominating the statistics.*/
+  fragw=1.0/_enc->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ti[64];
+    int       eob_token[64];
+    int       eob_run[64];
+    /*Set up token indices and eob run counts.
+      We don't bother trying to figure out the real cost of the runs that span
+       coefficients; instead we use the costs that were available when R-D
+       token optimization was done.*/
+    for(zzi=0;zzi<64;zzi++){
+      ti[zzi]=_enc->dct_token_offs[pli][zzi];
+      if(ti[zzi]>0){
+        token=_enc->dct_tokens[pli][zzi][0];
+        eb=_enc->extra_bits[pli][zzi][0];
+        eob_token[zzi]=token;
+        eob_run[zzi]=-oc_dct_token_skip(token,eb);
+      }
+      else{
+        eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        eob_run[zzi]=0;
+      }
+    }
+    /*Scan the list of coded fragments for this plane.*/
+    ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    for(;fragii<ncoded_fragis;fragii++){
+      ptrdiff_t    fragi;
+      ogg_uint32_t frag_bits;
+      int          huffi;
+      int          skip;
+      int          mb_mode;
+      unsigned     satd;
+      int          bin;
+      fragi=coded_fragis[fragii];
+      frag_bits=0;
+      for(zzi=0;zzi<64;){
+        if(eob_run[zzi]>0){
+          /*We've reached the end of the block.*/
+          eob_run[zzi]--;
+          break;
+        }
+        huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
+         +OC_ZZI_HUFF_OFFSET[zzi];
+        if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
+          /*This token caused an EOB run to be flushed.
+            Therefore it gets the bits associated with it.*/
+          frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
+          eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        }
+        token=_enc->dct_tokens[pli][zzi][ti[zzi]];
+        eb=_enc->extra_bits[pli][zzi][ti[zzi]];
+        ti[zzi]++;
+        skip=oc_dct_token_skip(token,eb);
+        if(skip<0){
+          eob_token[zzi]=token;
+          eob_run[zzi]=-skip;
+        }
+        else{
+          /*A regular DCT value token; accumulate the bits for it.*/
+          frag_bits+=_enc->huff_codes[huffi][token].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[token];
+          zzi+=skip;
+        }
+      }
+      mb_mode=frags[fragi].mb_mode;
+      qi=_enc->state.qis[frags[fragi].qii];
+      satd=frag_satd[fragi]<<(pli+1&2);
+      bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1);
+      oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin,
+       fragw,satd,frag_bits<<OC_BIT_SCALE,sqrt(frag_ssd[fragi]));
+    }
+  }
+  /*Update global SATD/rate/RMSE estimation matrix.*/
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]);
+  }
+}
+
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){
+  FILE *fmetrics;
+  int   qi;
+  /*Generate sample points for complete list of QI values.*/
+  for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+  fmetrics=fopen("modedec.stats","wb");
+  if(fmetrics!=NULL){
+    fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+    fclose(fmetrics);
+  }
+  fprintf(stdout,
+   "/*File generated by libtheora with OC_COLLECT_METRICS"
+   " defined at compile time.*/\n"
+   "#if !defined(_modedec_H)\n"
+   "# define _modedec_H (1)\n"
+   "\n"
+   "\n"
+   "\n"
+   "# if defined(OC_COLLECT_METRICS)\n"
+   "typedef struct oc_mode_metrics oc_mode_metrics;\n"
+   "# endif\n"
+   "typedef struct oc_mode_rd      oc_mode_rd;\n"
+   "\n"
+   "\n"
+   "\n"
+   "/*The number of extra bits of precision at which to store rate"
+   " metrics.*/\n"
+   "# define OC_BIT_SCALE  (%i)\n"
+   "/*The number of extra bits of precision at which to store RMSE metrics.\n"
+   "  This must be at least half OC_BIT_SCALE (rounded up).*/\n"
+   "# define OC_RMSE_SCALE (%i)\n"
+   "/*The number of bins to partition statistics into.*/\n"
+   "# define OC_SAD_BINS   (%i)\n"
+   "/*The number of bits of precision to drop"
+   " from SAD scores to assign them to a\n"
+   "   bin.*/\n"
+   "# define OC_SAD_SHIFT  (%i)\n"
+   "\n"
+   "\n"
+   "\n"
+   "# if defined(OC_COLLECT_METRICS)\n"
+   "struct oc_mode_metrics{\n"
+   "  double fragw;\n"
+   "  double satd;\n"
+   "  double rate;\n"
+   "  double rmse;\n"
+   "  double satd2;\n"
+   "  double satdrate;\n"
+   "  double rate2;\n"
+   "  double satdrmse;\n"
+   "  double rmse2;\n"
+   "};\n"
+   "\n"
+   "\n"
+   "int             oc_has_mode_metrics;\n"
+   "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n"
+   "# endif\n"
+   "\n"
+   "\n"
+   "\n"
+   "struct oc_mode_rd{\n"
+   "  ogg_int16_t rate;\n"
+   "  ogg_int16_t rmse;\n"
+   "};\n"
+   "\n"
+   "\n"
+   "# if !defined(OC_COLLECT_METRICS)\n"
+   "static const\n"
+   "# endif\n"
+   "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n",
+   OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT);
+  for(qi=0;qi<64;qi++){
+    int pli;
+    fprintf(stdout,"  {\n");
+    for(pli=0;pli<3;pli++){
+      int qti;
+      fprintf(stdout,"    {\n");
+      for(qti=0;qti<2;qti++){
+        int bin;
+        static const char *pl_names[3]={"Y'","Cb","Cr"};
+        static const char *qti_names[2]={"INTRA","INTER"};
+        fprintf(stdout,"      /*%s  qi=%i  %s*/\n",
+         pl_names[pli],qi,qti_names[qti]);
+        fprintf(stdout,"      {\n");
+        fprintf(stdout,"        ");
+        for(bin=0;bin<OC_SAD_BINS;bin++){
+          if(bin&&!(bin&0x3))fprintf(stdout,"\n        ");
+          fprintf(stdout,"{%5i,%5i}",
+           OC_MODE_RD[qi][pli][qti][bin].rate,
+           OC_MODE_RD[qi][pli][qti][bin].rmse);
+          if(bin+1<OC_SAD_BINS)fprintf(stdout,",");
+        }
+        fprintf(stdout,"\n      }");
+        if(qti<1)fprintf(stdout,",");
+        fprintf(stdout,"\n");
+      }
+      fprintf(stdout,"    }");
+      if(pli<2)fprintf(stdout,",");
+      fprintf(stdout,"\n");
+    }
+    fprintf(stdout,"  }");
+    if(qi<63)fprintf(stdout,",");
+    fprintf(stdout,"\n");
+  }
+  fprintf(stdout,
+   "};\n"
+   "\n"
+   "#endif\n");
+}
+#endif

Copied: trunk/theora/lib/apiwrapper.c (from rev 16442, trunk/theora/lib/dec/apiwrapper.c)
===================================================================
--- trunk/theora/lib/apiwrapper.c	                        (rev 0)
+++ trunk/theora/lib/apiwrapper.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,166 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+
+
+
+const char *theora_version_string(void){
+  return th_version_string();
+}
+
+ogg_uint32_t theora_version_number(void){
+  return th_version_number();
+}
+
+void theora_info_init(theora_info *_ci){
+  memset(_ci,0,sizeof(*_ci));
+}
+
+void theora_info_clear(theora_info *_ci){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  memset(_ci,0,sizeof(*_ci));
+  if(api!=NULL){
+    if(api->clear!=NULL)(*api->clear)(api);
+    _ogg_free(api);
+  }
+}
+
+void theora_clear(theora_state *_th){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
+  }
+  if(_th->internal_encode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
+  }
+  if(_th->i!=NULL)theora_info_clear(_th->i);
+  memset(_th,0,sizeof(*_th));
+}
+
+int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else return TH_EINVAL;
+}
+
+ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
+  _info->version_major=_ci->version_major;
+  _info->version_minor=_ci->version_minor;
+  _info->version_subminor=_ci->version_subminor;
+  _info->frame_width=_ci->width;
+  _info->frame_height=_ci->height;
+  _info->pic_width=_ci->frame_width;
+  _info->pic_height=_ci->frame_height;
+  _info->pic_x=_ci->offset_x;
+  _info->pic_y=_ci->offset_y;
+  _info->fps_numerator=_ci->fps_numerator;
+  _info->fps_denominator=_ci->fps_denominator;
+  _info->aspect_numerator=_ci->aspect_numerator;
+  _info->aspect_denominator=_ci->aspect_denominator;
+  switch(_ci->colorspace){
+    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
+    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
+    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
+  }
+  switch(_ci->pixelformat){
+    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
+    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
+    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
+    default:_info->pixel_fmt=TH_PF_RSVD;
+  }
+  _info->target_bitrate=_ci->target_bitrate;
+  _info->quality=_ci->quality;
+  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
+   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
+}
+
+int theora_packet_isheader(ogg_packet *_op){
+  return th_packet_isheader(_op);
+}
+
+int theora_packet_iskeyframe(ogg_packet *_op){
+  return th_packet_iskeyframe(_op);
+}
+
+int theora_granule_shift(theora_info *_ci){
+  /*This breaks when keyframe_frequency_force is not positive or is larger than
+     2**31 (if your int is more than 32 bits), but that's what the original
+     function does.*/
+  return oc_ilog(_ci->keyframe_frequency_force-1);
+}
+
+void theora_comment_init(theora_comment *_tc){
+  th_comment_init((th_comment *)_tc);
+}
+
+char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
+  return th_comment_query((th_comment *)_tc,_tag,_count);
+}
+
+int theora_comment_query_count(theora_comment *_tc,char *_tag){
+  return th_comment_query_count((th_comment *)_tc,_tag);
+}
+
+void theora_comment_clear(theora_comment *_tc){
+  th_comment_clear((th_comment *)_tc);
+}
+
+void theora_comment_add(theora_comment *_tc,char *_comment){
+  th_comment_add((th_comment *)_tc,_comment);
+}
+
+void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
+  th_comment_add_tag((th_comment *)_tc,_tag,_value);
+}

Copied: trunk/theora/lib/apiwrapper.h (from rev 16442, trunk/theora/lib/dec/apiwrapper.h)
===================================================================
--- trunk/theora/lib/apiwrapper.h	                        (rev 0)
+++ trunk/theora/lib/apiwrapper.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,54 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_apiwrapper_H)
+# define _apiwrapper_H (1)
+# include <ogg/ogg.h>
+# include <theora/theora.h>
+# include "theora/theoradec.h"
+# include "theora/theoraenc.h"
+# include "internal.h"
+
+typedef struct th_api_wrapper th_api_wrapper;
+typedef struct th_api_info    th_api_info;
+
+/*Provide an entry point for the codec setup to clear itself in case we ever
+   want to break pieces off into a common base library shared by encoder and
+   decoder.
+  In addition, this makes several other pieces of the API wrapper cleaner.*/
+typedef void (*oc_setup_clear_func)(void *_ts);
+
+/*Generally only one of these pointers will be non-NULL in any given instance.
+  Technically we do not even really need this struct, since we should be able
+   to figure out which one from "context", but doing it this way makes sure we
+   don't flub it up.*/
+struct th_api_wrapper{
+  oc_setup_clear_func  clear;
+  th_setup_info       *setup;
+  th_dec_ctx          *decode;
+  th_enc_ctx          *encode;
+};
+
+struct th_api_info{
+  th_api_wrapper api;
+  theora_info    info;
+};
+
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
+
+#endif

Copied: trunk/theora/lib/bitpack.c (from rev 16442, trunk/theora/lib/dec/bitpack.c)
===================================================================
--- trunk/theora/lib/bitpack.c	                        (rev 0)
+++ trunk/theora/lib/bitpack.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,111 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id$
+
+ ********************************************************************/
+#include <string.h>
+#include <stdlib.h>
+#include "bitpack.h"
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the MSb first.*/
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
+  memset(_b,0,sizeof(*_b));
+  _b->ptr=_buf;
+  _b->stop=_buf+_bytes;
+}
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+  }
+  _b->ptr=ptr;
+  if(_bits>available){
+    if(ptr>=stop){
+      _b->eof=1;
+      available=OC_LOTS_OF_BITS;
+    }
+    else window|=*ptr>>(available&7);
+  }
+  _b->bits=available;
+  return window;
+}
+
+int oc_pack_look1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1)_b->window=window=oc_pack_refill(_b,1);
+  return window>>OC_PB_WINDOW_SIZE-1;
+}
+
+void oc_pack_adv1(oc_pack_buf *_b){
+  _b->window<<=1;
+  _b->bits--;
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(available<_bits){
+    window=oc_pack_refill(_b,_bits);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  available-=_bits;
+  window<<=1;
+  window<<=_bits-1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+int oc_pack_read1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  int          result;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1){
+    window=oc_pack_refill(_b,1);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-1;
+  available--;
+  window<<=1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+long oc_pack_bytes_left(oc_pack_buf *_b){
+  if(_b->eof)return -1;
+  return _b->stop-_b->ptr+(_b->bits>>3);
+}

Copied: trunk/theora/lib/bitpack.h (from rev 16442, trunk/theora/lib/dec/bitpack.h)
===================================================================
--- trunk/theora/lib/bitpack.h	                        (rev 0)
+++ trunk/theora/lib/bitpack.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,59 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_bitpack_H)
+# define _bitpack_H (1)
+# include <limits.h>
+
+
+
+typedef unsigned long      oc_pb_window;
+typedef struct oc_pack_buf oc_pack_buf;
+
+
+
+# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define OC_LOTS_OF_BITS (0x40000000)
+
+
+
+struct oc_pack_buf{
+  oc_pb_window         window;
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  int                  bits;
+  int                  eof;
+};
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
+int oc_pack_look1(oc_pack_buf *_b);
+void oc_pack_adv1(oc_pack_buf *_b);
+/*Here we assume 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits);
+int oc_pack_read1(oc_pack_buf *_b);
+/* returns -1 for read beyond EOF, or the number of whole bytes available */
+long oc_pack_bytes_left(oc_pack_buf *_b);
+
+/*These two functions are implemented locally in huffdec.c*/
+/*Read in bits without advancing the bitptr.
+  Here we assume 0<=_bits&&_bits<=32.*/
+/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
+/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
+
+#endif

Copied: trunk/theora/lib/dct.h (from rev 16442, trunk/theora/lib/dec/dct.h)
===================================================================
--- trunk/theora/lib/dct.h	                        (rev 0)
+++ trunk/theora/lib/dct.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,31 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+
+/*Definitions shared by the forward and inverse DCT transforms.*/
+#if !defined(_dct_H)
+# define _dct_H (1)
+
+/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
+#define OC_C1S7 ((ogg_int32_t)64277)
+#define OC_C2S6 ((ogg_int32_t)60547)
+#define OC_C3S5 ((ogg_int32_t)54491)
+#define OC_C4S4 ((ogg_int32_t)46341)
+#define OC_C5S3 ((ogg_int32_t)36410)
+#define OC_C6S2 ((ogg_int32_t)25080)
+#define OC_C7S1 ((ogg_int32_t)12785)
+
+#endif

Copied: trunk/theora/lib/decapiwrapper.c (from rev 16442, trunk/theora/lib/dec/decapiwrapper.c)
===================================================================
--- trunk/theora/lib/decapiwrapper.c	                        (rev 0)
+++ trunk/theora/lib/decapiwrapper.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,193 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "decint.h"
+#include "theora/theoradec.h"
+
+static void th_dec_api_clear(th_api_wrapper *_api){
+  if(_api->setup)th_setup_free(_api->setup);
+  if(_api->decode)th_decode_free(_api->decode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_decode_clear(theora_state *_td){
+  if(_td->i!=NULL)theora_info_clear(_td->i);
+  memset(_td,0,sizeof(*_td));
+}
+
+static int theora_decode_control(theora_state *_td,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_decode_clear,
+  (oc_state_control_func)theora_decode_control,
+  (oc_state_granule_frame_func)theora_decode_granule_frame,
+  (oc_state_granule_time_func)theora_decode_granule_time,
+};
+
+static void th_info2theora_info(theora_info *_ci,const th_info *_info){
+  _ci->version_major=_info->version_major;
+  _ci->version_minor=_info->version_minor;
+  _ci->version_subminor=_info->version_subminor;
+  _ci->width=_info->frame_width;
+  _ci->height=_info->frame_height;
+  _ci->frame_width=_info->pic_width;
+  _ci->frame_height=_info->pic_height;
+  _ci->offset_x=_info->pic_x;
+  _ci->offset_y=_info->pic_y;
+  _ci->fps_numerator=_info->fps_numerator;
+  _ci->fps_denominator=_info->fps_denominator;
+  _ci->aspect_numerator=_info->aspect_numerator;
+  _ci->aspect_denominator=_info->aspect_denominator;
+  switch(_info->colorspace){
+    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
+    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
+    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
+  }
+  switch(_info->pixel_fmt){
+    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
+    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
+    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
+    default:_ci->pixelformat=OC_PF_RSVD;
+  }
+  _ci->target_bitrate=_info->target_bitrate;
+  _ci->quality=_info->quality;
+  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
+}
+
+int theora_decode_init(theora_state *_td,theora_info *_ci){
+  th_api_info    *apiinfo;
+  th_api_wrapper *api;
+  th_info         info;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+  if(apiinfo==NULL)return OC_FAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  /*Convert the info struct now instead of saving the the one we decoded with
+     theora_decode_header(), since the user might have modified values (i.e.,
+     color space, aspect ratio, etc. can be specified from a higher level).
+    The user also might be doing something "clever" with the header packets if
+     they are not using an Ogg encapsulation.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
+     of the stuff it needs.*/
+  apiinfo->api.decode=th_decode_alloc(&info,api->setup);
+  if(apiinfo->api.decode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
+  _td->internal_encode=NULL;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
+  _td->granulepos=0;
+  _td->i=&apiinfo->info;
+  _td->i->codec_setup=&apiinfo->api;
+  return 0;
+}
+
+int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
+  th_api_wrapper *api;
+  th_info         info;
+  int             ret;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate an API wrapper struct on demand, since it will not also include a
+     theora_info struct like the ones that are used in a theora_state struct.*/
+  if(api==NULL){
+    _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+    if(_ci->codec_setup==NULL)return OC_FAULT;
+    api=(th_api_wrapper *)_ci->codec_setup;
+    api->clear=(oc_setup_clear_func)th_dec_api_clear;
+  }
+  /*Convert from the theora_info struct instead of saving our own th_info
+     struct between calls.
+    The user might be doing something "clever" with the header packets if they
+     are not using an Ogg encapsulation, and we don't want to break this.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*We rely on the fact that theora_comment and th_comment structures are
+     actually identical.
+    Take care not to change this fact unless you change the code here as
+     well!*/
+  ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
+  /*We also rely on the fact that the error return code values are the same,
+    and that the implementations of these two functions return the same set of
+    them.
+   Note that theora_decode_header() really can return OC_NOTFORMAT, even
+    though it is not currently documented to do so.*/
+  if(ret<0)return ret;
+  th_info2theora_info(_ci,&info);
+  return 0;
+}
+
+int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
+  th_api_wrapper *api;
+  ogg_int64_t     gp;
+  int             ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  ret=th_decode_packetin(api->decode,_op,&gp);
+  if(ret<0)return OC_BADPACKET;
+  _td->granulepos=gp;
+  return 0;
+}
+
+int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_dec_ctx      *decode;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  decode=(th_dec_ctx *)api->decode;
+  if(!decode)return OC_FAULT;
+  ret=th_decode_ycbcr_out(decode,buf);
+  if(ret>=0){
+    _yuv->y_width=buf[0].width;
+    _yuv->y_height=buf[0].height;
+    _yuv->y_stride=buf[0].stride;
+    _yuv->uv_width=buf[1].width;
+    _yuv->uv_height=buf[1].height;
+    _yuv->uv_stride=buf[1].stride;
+    _yuv->y=buf[0].data;
+    _yuv->u=buf[1].data;
+    _yuv->v=buf[2].data;
+  }
+  return ret;
+}

Copied: trunk/theora/lib/decinfo.c (from rev 16442, trunk/theora/lib/dec/decinfo.c)
===================================================================
--- trunk/theora/lib/decinfo.c	                        (rev 0)
+++ trunk/theora/lib/decinfo.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,246 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "decint.h"
+
+
+
+/*Unpacks a series of octets from a given byte array into the pack buffer.
+  No checking is done to ensure the buffer contains enough data.
+  _opb: The pack buffer to read the octets from.
+  _buf: The byte array to store the unpacked bytes in.
+  _len: The number of octets to unpack.*/
+static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
+  while(_len-->0){
+    long val;
+    val=oc_pack_read(_opb,8);
+    *_buf++=(char)val;
+  }
+}
+
+/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
+static long oc_unpack_length(oc_pack_buf *_opb){
+  long ret[4];
+  int  i;
+  for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
+  return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
+}
+
+static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
+  long val;
+  /*Check the codec bitstream version.*/
+  val=oc_pack_read(_opb,8);
+  _info->version_major=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_minor=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_subminor=(unsigned char)val;
+  /*verify we can parse this bitstream version.
+     We accept earlier minors and all subminors, by spec*/
+  if(_info->version_major>TH_VERSION_MAJOR||
+   _info->version_major==TH_VERSION_MAJOR&&
+   _info->version_minor>TH_VERSION_MINOR){
+    return TH_EVERSION;
+  }
+  /*Read the encoded frame description.*/
+  val=oc_pack_read(_opb,16);
+  _info->frame_width=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,16);
+  _info->frame_height=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,24);
+  _info->pic_width=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->pic_height=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_x=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_y=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_denominator=(ogg_uint32_t)val;
+  if(_info->frame_width==0||_info->frame_height==0||
+   _info->pic_width+_info->pic_x>_info->frame_width||
+   _info->pic_height+_info->pic_y>_info->frame_height||
+   _info->fps_numerator==0||_info->fps_denominator==0){
+    return TH_EBADHEADER;
+  }
+  /*Note: The sense of pic_y is inverted in what we pass back to the
+     application compared to how it is stored in the bitstream.
+    This is because the bitstream uses a right-handed coordinate system, while
+     applications expect a left-handed one.*/
+  _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_denominator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->colorspace=(th_colorspace)val;
+  val=oc_pack_read(_opb,24);
+  _info->target_bitrate=(int)val;
+  val=oc_pack_read(_opb,6);
+  _info->quality=(int)val;
+  val=oc_pack_read(_opb,5);
+  _info->keyframe_granule_shift=(int)val;
+  val=oc_pack_read(_opb,2);
+  _info->pixel_fmt=(th_pixel_fmt)val;
+  if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
+  val=oc_pack_read(_opb,3);
+  if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  return 0;
+}
+
+static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
+  long len;
+  int  i;
+  /*Read the vendor string.*/
+  len=oc_unpack_length(_opb);
+  if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
+  _tc->vendor=_ogg_malloc((size_t)len+1);
+  if(_tc->vendor==NULL)return TH_EFAULT;
+  oc_unpack_octets(_opb,_tc->vendor,len);
+  _tc->vendor[len]='\0';
+  /*Read the user comments.*/
+  _tc->comments=(int)oc_unpack_length(_opb);
+  len=_tc->comments;
+  if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
+    _tc->comments=0;
+    return TH_EBADHEADER;
+  }
+  _tc->comment_lengths=(int *)_ogg_malloc(
+   _tc->comments*sizeof(_tc->comment_lengths[0]));
+  _tc->user_comments=(char **)_ogg_malloc(
+   _tc->comments*sizeof(_tc->user_comments[0]));
+  for(i=0;i<_tc->comments;i++){
+    len=oc_unpack_length(_opb);
+    if(len<0||len>oc_pack_bytes_left(_opb)){
+      _tc->comments=i;
+      return TH_EBADHEADER;
+    }
+    _tc->comment_lengths[i]=len;
+    _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+    if(_tc->user_comments[i]==NULL){
+      _tc->comments=i;
+      return TH_EFAULT;
+    }
+    oc_unpack_octets(_opb,_tc->user_comments[i],len);
+    _tc->user_comments[i][len]='\0';
+  }
+  return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
+}
+
+static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
+  int ret;
+  /*Read the quantizer tables.*/
+  ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
+  if(ret<0)return ret;
+  /*Read the Huffman trees.*/
+  return oc_huff_trees_unpack(_opb,_setup->huff_tables);
+}
+
+static void oc_setup_clear(th_setup_info *_setup){
+  oc_quant_params_clear(&_setup->qinfo);
+  oc_huff_trees_clear(_setup->huff_tables);
+}
+
+static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
+ th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
+  char buffer[6];
+  long val;
+  int  packtype;
+  int  ret;
+  val=oc_pack_read(_opb,8);
+  packtype=(int)val;
+  /*If we're at a data packet and we have received all three headers, we're
+     done.*/
+  if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
+    return 0;
+  }
+  /*Check the codec string.*/
+  oc_unpack_octets(_opb,buffer,6);
+  if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
+  switch(packtype){
+    /*Codec info header.*/
+    case 0x80:{
+      /*This should be the first packet, and we should not already be
+         initialized.*/
+      if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
+      ret=oc_info_unpack(_opb,_info);
+      if(ret<0)th_info_clear(_info);
+      else ret=3;
+    }break;
+    /*Comment header.*/
+    case 0x81:{
+      if(_tc==NULL)return TH_EFAULT;
+      /*We shoud have already decoded the info header, and should not yet have
+         decoded the comment header.*/
+      if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
+      ret=oc_comment_unpack(_opb,_tc);
+      if(ret<0)th_comment_clear(_tc);
+      else ret=2;
+    }break;
+    /*Codec setup header.*/
+    case 0x82:{
+      oc_setup_info *setup;
+      if(_tc==NULL||_setup==NULL)return TH_EFAULT;
+      /*We should have already decoded the info header and the comment header,
+         and should not yet have decoded the setup header.*/
+      if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
+        return TH_EBADHEADER;
+      }
+      setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+      if(setup==NULL)return TH_EFAULT;
+      ret=oc_setup_unpack(_opb,setup);
+      if(ret<0){
+        oc_setup_clear(setup);
+        _ogg_free(setup);
+      }
+      else{
+        *_setup=setup;
+        ret=1;
+      }
+    }break;
+    default:{
+      /*We don't know what this header is.*/
+      return TH_EBADHEADER;
+    }break;
+  }
+  return ret;
+}
+
+
+/*Decodes one header packet.
+  This should be called repeatedly with the packets at the beginning of the
+   stream until it returns 0.*/
+int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op){
+  oc_pack_buf opb;
+  if(_op==NULL)return TH_EBADHEADER;
+  if(_info==NULL)return TH_EFAULT;
+  oc_pack_readinit(&opb,_op->packet,_op->bytes);
+  return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
+}
+
+void th_setup_free(th_setup_info *_setup){
+  if(_setup!=NULL){
+    oc_setup_clear(_setup);
+    _ogg_free(_setup);
+  }
+}

Copied: trunk/theora/lib/decint.h (from rev 16442, trunk/theora/lib/dec/decint.h)
===================================================================
--- trunk/theora/lib/decint.h	                        (rev 0)
+++ trunk/theora/lib/decint.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,107 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <limits.h>
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "theora/theoradec.h"
+# include "internal.h"
+# include "bitpack.h"
+
+typedef struct th_setup_info oc_setup_info;
+typedef struct th_dec_ctx    oc_dec_ctx;
+
+# include "huffdec.h"
+# include "dequant.h"
+
+/*Constants for the packet-in state machine specific to the decoder.*/
+
+/*Next packet to read: Data packet.*/
+#define OC_PACKET_DATA (0)
+
+
+
+struct th_setup_info{
+  /*The Huffman codes.*/
+  oc_huff_node      *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The quantization parameters.*/
+  th_quant_info  qinfo;
+};
+
+
+
+struct th_dec_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state      state;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and goes to 1
+     when a frame has been processed and a data packet is ready.*/
+  int                  packet_state;
+  /*Buffer in which to assemble packets.*/
+  oc_pack_buf          opb;
+  /*Huffman decode trees.*/
+  oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The index of the first token in each plane for each coefficient.*/
+  ptrdiff_t            ti0[3][64];
+  /*The number of outstanding EOB runs at the start of each coefficient in each
+     plane.*/
+  ptrdiff_t            eob_runs[3][64];
+  /*The DCT token lists.*/
+  unsigned char       *dct_tokens;
+  /*The extra bits associated with DCT tokens.*/
+  unsigned char       *extra_bits;
+  /*The number of dct tokens unpacked so far.*/
+  int                  dct_tokens_count;
+  /*The out-of-loop post-processing level.*/
+  int                  pp_level;
+  /*The DC scale used for out-of-loop deblocking.*/
+  int                  pp_dc_scale[64];
+  /*The sharpen modifier used for out-of-loop deringing.*/
+  int                  pp_sharp_mod[64];
+  /*The DC quantization index of each block.*/
+  unsigned char       *dc_qis;
+  /*The variance of each block.*/
+  int                 *variances;
+  /*The storage for the post-processed frame buffer.*/
+  unsigned char       *pp_frame_data;
+  /*Whether or not the post-processsed frame buffer has space for chroma.*/
+  int                  pp_frame_state;
+  /*The buffer used for the post-processed frame.
+    Note that this is _not_ guaranteed to have the same strides and offsets as
+     the reference frame buffers.*/
+  th_ycbcr_buffer      pp_frame_buf;
+  /*The striped decode callback function.*/
+  th_stripe_callback   stripe_cb;
+# if defined(HAVE_CAIRO)
+  /*Output metrics for debugging.*/
+  int                  telemetry;
+  int                  telemetry_mbmode;
+  int                  telemetry_mv;
+  int                  telemetry_qi;
+  int                  telemetry_bits;
+  int                  telemetry_frame_bytes;
+  int                  telemetry_coding_bytes;
+  int                  telemetry_mode_bytes;
+  int                  telemetry_mv_bytes;
+  int                  telemetry_qi_bytes;
+  int                  telemetry_dc_bytes;
+  unsigned char       *telemetry_frame_data;
+# endif
+};
+
+#endif

Copied: trunk/theora/lib/decode.c (from rev 16442, trunk/theora/lib/dec/decode.c)
===================================================================
--- trunk/theora/lib/decode.c	                        (rev 0)
+++ trunk/theora/lib/decode.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,2928 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "decint.h"
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+#if defined(HAVE_CAIRO)
+# include <cairo.h>
+#endif
+
+
+/*No post-processing.*/
+#define OC_PP_LEVEL_DISABLED  (0)
+/*Keep track of DC qi for each block only.*/
+#define OC_PP_LEVEL_TRACKDCQI (1)
+/*Deblock the luma plane.*/
+#define OC_PP_LEVEL_DEBLOCKY  (2)
+/*Dering the luma plane.*/
+#define OC_PP_LEVEL_DERINGY   (3)
+/*Stronger luma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGY  (4)
+/*Deblock the chroma planes.*/
+#define OC_PP_LEVEL_DEBLOCKC  (5)
+/*Dering the chroma planes.*/
+#define OC_PP_LEVEL_DERINGC   (6)
+/*Stronger chroma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGC  (7)
+/*Maximum valid post-processing level.*/
+#define OC_PP_LEVEL_MAX       (7)
+
+
+
+/*The mode alphabets for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
+  /*Last MV dominates */
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
+    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
+  },
+  /*No MV dominates.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  /*Default ordering.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  }
+};
+
+
+/*The original DCT tokens are extended and reordered during the construction of
+   the Huffman tables.
+  The extension means more bits can be read with fewer calls to the bitpacker
+   during the Huffman decoding process (at the cost of larger Huffman tables),
+   and fewer tokens require additional extra bits (reducing the average storage
+   per decoded token).
+  The revised ordering reveals essential information in the token value
+   itself; specifically, whether or not there are additional extra bits to read
+   and the parameter to which those extra bits are applied.
+  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
+  The extra bits are added into code word at the bit position inferred from the
+   token value, giving the final code word from which all required parameters
+   are derived.
+  The number of EOBs and the leading zero run length can be extracted directly.
+  The coefficient magnitude is optionally negated before extraction, according
+   to a 'flip' bit.*/
+
+/*The number of additional extra bits that are decoded with each of the
+   internal DCT tokens.*/
+static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
+  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
+};
+
+/*Whether or not an internal token needs any additional extra bits.*/
+#define OC_DCT_TOKEN_NEEDS_MORE(token) \
+ (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
+  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
+
+/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
+#define OC_DCT_TOKEN_FAT_EOB (0)
+
+/*The number of EOBs to use for an end-of-frame token.
+  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
+   is not yet available everywhere; this should be equivalent.*/
+#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
+
+/*The location of the (6) run legth bits in the code word.
+  These are placed at index 0 and given 8 bits (even though 6 would suffice)
+   because it may be faster to extract the lower byte on some platforms.*/
+#define OC_DCT_CW_RLEN_SHIFT (0)
+/*The location of the (12) EOB bits in the code word.*/
+#define OC_DCT_CW_EOB_SHIFT  (8)
+/*The location of the (1) flip bit in the code word.
+  This must be right under the magnitude bits.*/
+#define OC_DCT_CW_FLIP_BIT   (20)
+/*The location of the (11) token magnitude bits in the code word.
+  These must be last, and rely on a sign-extending right shift.*/
+#define OC_DCT_CW_MAG_SHIFT  (21)
+
+/*Pack the given fields into a code word.*/
+#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
+ ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
+ (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
+ (_flip)<<OC_DCT_CW_FLIP_BIT| \
+ (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
+
+/*A special code word value that signals the end of the frame (a long EOB run
+   of zero).*/
+#define OC_DCT_CW_FINISH (0)
+
+/*The position at which to insert the extra bits in the code word.
+  We use this formulation because Intel has no useful cmov.
+  A real architecture would probably do better with two of those.
+  This translates to 11 instructions(!), and is _still_ faster than either a
+   table lookup (just barely) or the naive double-ternary implementation (which
+   gcc translates to a jump and a cmov).
+  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
+   you want to make one of the other shifts zero.*/
+#define OC_DCT_TOKEN_EB_POS(_token) \
+ ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
+ +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
+
+/*The code words for each internal token.
+  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
+   order.*/
+static const ogg_int32_t OC_DCT_CODE_WORD[92]={
+  /*These tokens require additional extra bits for the EOB count.*/
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+  OC_DCT_CW_FINISH,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+  OC_DCT_CW_PACK(16, 0,  0,0),
+  /*These tokens require additional extra bits for the magnitude.*/
+  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 13,0),
+  OC_DCT_CW_PACK( 0, 0, 13,1),
+  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 21,0),
+  OC_DCT_CW_PACK( 0, 0, 21,1),
+  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 37,0),
+  OC_DCT_CW_PACK( 0, 0, 37,1),
+  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 69,0),
+  OC_DCT_CW_PACK( 0, 0,325,0),
+  OC_DCT_CW_PACK( 0, 0, 69,1),
+  OC_DCT_CW_PACK( 0, 0,325,1),
+  /*These tokens require additional extra bits for the run length.*/
+  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0,10, +1,0),
+  OC_DCT_CW_PACK( 0,10, -1,0),
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)
+    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  /*The remaining tokens require no additional extra bits.*/
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 1, 0,  0,0),
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 2, 0,  0,0),
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 3, 0,  0,0),
+  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
+  OC_DCT_CW_PACK( 0, 1, +1,0),
+  OC_DCT_CW_PACK( 0, 1, -1,0),
+  OC_DCT_CW_PACK( 0, 2, +1,0),
+  OC_DCT_CW_PACK( 0, 2, -1,0),
+  OC_DCT_CW_PACK( 0, 3, +1,0),
+  OC_DCT_CW_PACK( 0, 3, -1,0),
+  OC_DCT_CW_PACK( 0, 4, +1,0),
+  OC_DCT_CW_PACK( 0, 4, -1,0),
+  OC_DCT_CW_PACK( 0, 5, +1,0),
+  OC_DCT_CW_PACK( 0, 5, -1,0),
+  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 1, +2,0),
+  OC_DCT_CW_PACK( 0, 1, +3,0),
+  OC_DCT_CW_PACK( 0, 1, -2,0),
+  OC_DCT_CW_PACK( 0, 1, -3,0),
+  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 6, +1,0),
+  OC_DCT_CW_PACK( 0, 7, +1,0),
+  OC_DCT_CW_PACK( 0, 8, +1,0),
+  OC_DCT_CW_PACK( 0, 9, +1,0),
+  OC_DCT_CW_PACK( 0, 6, -1,0),
+  OC_DCT_CW_PACK( 0, 7, -1,0),
+  OC_DCT_CW_PACK( 0, 8, -1,0),
+  OC_DCT_CW_PACK( 0, 9, -1,0),
+  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 2, +2,0),
+  OC_DCT_CW_PACK( 0, 3, +2,0),
+  OC_DCT_CW_PACK( 0, 2, +3,0),
+  OC_DCT_CW_PACK( 0, 3, +3,0),
+  OC_DCT_CW_PACK( 0, 2, -2,0),
+  OC_DCT_CW_PACK( 0, 3, -2,0),
+  OC_DCT_CW_PACK( 0, 2, -3,0),
+  OC_DCT_CW_PACK( 0, 3, -3,0),
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
+    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  OC_DCT_CW_PACK( 0, 1,  0,0),
+  OC_DCT_CW_PACK( 0, 2,  0,0),
+  OC_DCT_CW_PACK( 0, 3,  0,0),
+  OC_DCT_CW_PACK( 0, 4,  0,0),
+  OC_DCT_CW_PACK( 0, 5,  0,0),
+  OC_DCT_CW_PACK( 0, 6,  0,0),
+  OC_DCT_CW_PACK( 0, 7,  0,0),
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +1,0),
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -1,0),
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +2,0),
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -2,0),
+  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
+  OC_DCT_CW_PACK( 0, 0, +3,0),
+  OC_DCT_CW_PACK( 0, 0, -3,0),
+  OC_DCT_CW_PACK( 0, 0, +4,0),
+  OC_DCT_CW_PACK( 0, 0, -4,0),
+  OC_DCT_CW_PACK( 0, 0, +5,0),
+  OC_DCT_CW_PACK( 0, 0, -5,0),
+  OC_DCT_CW_PACK( 0, 0, +6,0),
+  OC_DCT_CW_PACK( 0, 0, -6,0),
+  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +7,0),
+  OC_DCT_CW_PACK( 0, 0, +8,0),
+  OC_DCT_CW_PACK( 0, 0, -7,0),
+  OC_DCT_CW_PACK( 0, 0, -8,0),
+  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +9,0),
+  OC_DCT_CW_PACK( 0, 0,+10,0),
+  OC_DCT_CW_PACK( 0, 0,+11,0),
+  OC_DCT_CW_PACK( 0, 0,+12,0),
+  OC_DCT_CW_PACK( 0, 0, -9,0),
+  OC_DCT_CW_PACK( 0, 0,-10,0),
+  OC_DCT_CW_PACK( 0, 0,-11,0),
+  OC_DCT_CW_PACK( 0, 0,-12,0),
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 8, 0,  0,0),
+  OC_DCT_CW_PACK( 9, 0,  0,0),
+  OC_DCT_CW_PACK(10, 0,  0,0),
+  OC_DCT_CW_PACK(11, 0,  0,0),
+  OC_DCT_CW_PACK(12, 0,  0,0),
+  OC_DCT_CW_PACK(13, 0,  0,0),
+  OC_DCT_CW_PACK(14, 0,  0,0),
+  OC_DCT_CW_PACK(15, 0,  0,0),
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 4, 0,  0,0),
+  OC_DCT_CW_PACK( 5, 0,  0,0),
+  OC_DCT_CW_PACK( 6, 0,  0,0),
+  OC_DCT_CW_PACK( 7, 0,  0,0),
+};
+
+
+
+static int oc_sb_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  int ret;
+  /*Coding scheme:
+       Codeword            Run Length
+     0                       1
+     10x                     2-3
+     110x                    4-5
+     1110xx                  6-9
+     11110xxx                10-17
+     111110xxxx              18-33
+     111111xxxxxxxxxxxx      34-4129*/
+  bits=oc_pack_read1(_opb);
+  if(bits==0)return 1;
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 2+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 4+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 6+(int)bits;
+  else if((bits&2)==0){
+    ret=10+((bits&1)<<2);
+    bits=oc_pack_read(_opb,2);
+    return ret+(int)bits;
+  }
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,4);
+    return 18+(int)bits;
+  }
+  bits=oc_pack_read(_opb,12);
+  return 34+(int)bits;
+}
+
+static int oc_block_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  long bits2;
+  /*Coding scheme:
+     Codeword             Run Length
+     0x                      1-2
+     10x                     3-4
+     110x                    5-6
+     1110xx                  7-10
+     11110xx                 11-14
+     11111xxxx               15-30*/
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 1+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 3+(int)bits;
+  }
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 5+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,2);
+    return 7+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 11+bits;
+  bits2=oc_pack_read(_opb,2);
+  return 15+((bits&3)<<2)+bits2;
+}
+
+
+
+static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
+ const th_setup_info *_setup){
+  int qti;
+  int pli;
+  int qi;
+  int ret;
+  ret=oc_state_init(&_dec->state,_info,3);
+  if(ret<0)return ret;
+  ret=oc_huff_trees_copy(_dec->huff_tables,
+   (const oc_huff_node *const *)_setup->huff_tables);
+  if(ret<0){
+    oc_state_clear(&_dec->state);
+    return ret;
+  }
+  /*For each fragment, allocate one byte for every DCT coefficient token, plus
+     one byte for extra-bits for each token, plus one more byte for the long
+     EOB run, just in case it's the very last token and has a run length of
+     one.*/
+  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
+   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+  if(_dec->dct_tokens==NULL){
+    oc_huff_trees_clear(_dec->huff_tables);
+    oc_state_clear(&_dec->state);
+    return TH_EFAULT;
+  }
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _dec->state.dequant_tables[qi][pli][qti]=
+     _dec->state.dequant_table_data[qi][pli][qti];
+  }
+  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
+   &_setup->qinfo);
+  for(qi=0;qi<64;qi++){
+    int qsum;
+    qsum=0;
+    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+      qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
+       _dec->state.dequant_tables[qti][pli][qi][17]+
+       _dec->state.dequant_tables[qti][pli][qi][18]+
+       _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
+    }
+    _dec->pp_sharp_mod[qi]=-(qsum>>11);
+  }
+  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
+   sizeof(_dec->state.loop_filter_limits));
+  _dec->pp_level=OC_PP_LEVEL_DISABLED;
+  _dec->dc_qis=NULL;
+  _dec->variances=NULL;
+  _dec->pp_frame_data=NULL;
+  _dec->stripe_cb.ctx=NULL;
+  _dec->stripe_cb.stripe_decoded=NULL;
+#if defined(HAVE_CAIRO)
+  _dec->telemetry=0;
+  _dec->telemetry_bits=0;
+  _dec->telemetry_qi=0;
+  _dec->telemetry_mbmode=0;
+  _dec->telemetry_mv=0;
+  _dec->telemetry_frame_data=NULL;
+#endif
+  return 0;
+}
+
+static void oc_dec_clear(oc_dec_ctx *_dec){
+#if defined(HAVE_CAIRO)
+  _ogg_free(_dec->telemetry_frame_data);
+#endif
+  _ogg_free(_dec->pp_frame_data);
+  _ogg_free(_dec->variances);
+  _ogg_free(_dec->dc_qis);
+  _ogg_free(_dec->dct_tokens);
+  oc_huff_trees_clear(_dec->huff_tables);
+  oc_state_clear(&_dec->state);
+}
+
+
+static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
+  long val;
+  /*Check to make sure this is a data packet.*/
+  val=oc_pack_read1(&_dec->opb);
+  if(val!=0)return TH_EBADPACKET;
+  /*Read in the frame type (I or P).*/
+  val=oc_pack_read1(&_dec->opb);
+  _dec->state.frame_type=(int)val;
+  /*Read in the qi list.*/
+  val=oc_pack_read(&_dec->opb,6);
+  _dec->state.qis[0]=(unsigned char)val;
+  val=oc_pack_read1(&_dec->opb);
+  if(!val)_dec->state.nqis=1;
+  else{
+    val=oc_pack_read(&_dec->opb,6);
+    _dec->state.qis[1]=(unsigned char)val;
+    val=oc_pack_read1(&_dec->opb);
+    if(!val)_dec->state.nqis=2;
+    else{
+      val=oc_pack_read(&_dec->opb,6);
+      _dec->state.qis[2]=(unsigned char)val;
+      _dec->state.nqis=3;
+    }
+  }
+  if(_dec->state.frame_type==OC_INTRA_FRAME){
+    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      I don't know why these remain.*/
+    /*I wanted to eliminate wasted bits, but not all config wiggle room
+       --Monty.*/
+    val=oc_pack_read(&_dec->opb,3);
+    if(val!=0)return TH_EIMPL;
+  }
+  return 0;
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also builds up the coded fragment list (in coded order), and clears the
+   uncoded fragment list.
+  It does not update the coded macro block list nor the super block flags, as
+   those are not used when decoding INTRA frames.*/
+static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                pli;
+  coded_fragis=_dec->state.coded_fragis;
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            frags[fragi].coded=1;
+            frags[fragi].mb_mode=OC_MODE_INTRA;
+            coded_fragis[ncoded_fragis++]=fragi;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+}
+
+/*Decodes the bit flags indicating whether each super block is partially coded
+   or not.
+  Return: The number of partially coded super blocks.*/
+static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     npartial;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  sbi=npartial=0;
+  while(sbi<nsbs){
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    do{
+      sb_flags[sbi].coded_partially=flag;
+      sb_flags[sbi].coded_fully=0;
+      npartial+=flag;
+      sbi++;
+    }
+    while(--run_count>0&&sbi<nsbs);
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+  return npartial;
+}
+
+/*Decodes the bit flags for whether or not each non-partially-coded super
+   block is fully coded or not.
+  This function should only be called if there is at least one
+   non-partially-coded super block.
+  Return: The number of partially coded super blocks.*/
+static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  /*Skip partially coded super blocks.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  do{
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    for(;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(run_count--<=0)break;
+      sb_flags[sbi].coded_fully=flag;
+    }
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  while(sbi<nsbs);
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  long               val;
+  int                pli;
+  int                flag;
+  int                run_count;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t         *uncoded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          nuncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  npartial=oc_dec_partial_sb_flags_unpack(_dec);
+  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
+  if(npartial>0){
+    val=oc_pack_read1(&_dec->opb);
+    flag=!(int)val;
+  }
+  else flag=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=run_count=0;
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            int coded;
+            if(sb_flags[sbi].coded_fully)coded=1;
+            else if(!sb_flags[sbi].coded_partially)coded=0;
+            else{
+              if(run_count<=0){
+                run_count=oc_block_run_unpack(&_dec->opb);
+                flag=!flag;
+              }
+              run_count--;
+              coded=flag;
+            }
+            if(coded)coded_fragis[ncoded_fragis++]=fragi;
+            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=coded;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+
+
+typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  int  i;
+  for(i=0;i<7;i++){
+    val=oc_pack_read1(_opb);
+    if(!val)break;
+  }
+  return i;
+}
+
+static int oc_clc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  val=oc_pack_read(_opb,3);
+  return (int)val;
+}
+
+/*Unpacks the list of macro block modes for INTER frames.*/
+static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+  const oc_mb_map     *mb_maps;
+  signed char         *mb_modes;
+  const oc_fragment   *frags;
+  const unsigned char *alphabet;
+  unsigned char        scheme0_alphabet[8];
+  oc_mode_unpack_func  mode_unpack;
+  size_t               nmbs;
+  size_t               mbi;
+  long                 val;
+  int                  mode_scheme;
+  val=oc_pack_read(&_dec->opb,3);
+  mode_scheme=(int)val;
+  if(mode_scheme==0){
+    int mi;
+    /*Just in case, initialize the modes to something.
+      If the bitstream doesn't contain each index exactly once, it's likely
+       corrupt and the rest of the packet is garbage anyway, but this way we
+       won't crash, and we'll decode SOMETHING.*/
+    /*LOOP VECTORIZES*/
+    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
+    for(mi=0;mi<OC_NMODES;mi++){
+      val=oc_pack_read(&_dec->opb,3);
+      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
+    }
+    alphabet=scheme0_alphabet;
+  }
+  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
+  else mode_unpack=oc_vlc_mode_unpack;
+  mb_modes=_dec->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  nmbs=_dec->state.nmbs;
+  frags=_dec->state.frags;
+  for(mbi=0;mbi<nmbs;mbi++){
+    if(mb_modes[mbi]!=OC_MODE_INVALID){
+      int bi;
+      /*Check for a coded luma block in this macro block.*/
+      for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
+      /*We found one, decode a mode.*/
+      if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
+      /*There were none: INTER_NOMV is forced.*/
+      else mb_modes[mbi]=OC_MODE_INTER_NOMV;
+    }
+  }
+}
+
+
+
+typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,3);
+  switch(bits){
+    case  0:return 0;
+    case  1:return 1;
+    case  2:return -1;
+    case  3:
+    case  4:{
+      mv=(int)(bits-1);
+      bits=oc_pack_read1(_opb);
+    }break;
+    /*case  5:
+    case  6:
+    case  7:*/
+    default:{
+      mv=1<<bits-3;
+      bits=oc_pack_read(_opb,bits-2);
+      mv+=(int)(bits>>1);
+      bits&=1;
+    }break;
+  }
+  mask=-(int)bits;
+  return mv+mask^mask;
+}
+
+static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,6);
+  mv=(int)bits>>1;
+  mask=-((int)bits&1);
+  return mv+mask^mask;
+}
+
+/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
+   block modes and motion vectors to the individual fragments.*/
+static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+  const oc_mb_map        *mb_maps;
+  const signed char      *mb_modes;
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_mv_comp_unpack_func  mv_comp_unpack;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  oc_mv                   last_mv[2];
+  oc_mv                   cbmvs[4];
+  size_t                  nmbs;
+  size_t                  mbi;
+  long                    val;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
+  val=oc_pack_read1(&_dec->opb);
+  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
+  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
+  memset(last_mv,0,sizeof(last_mv));
+  frags=_dec->state.frags;
+  frag_mvs=_dec->state.frag_mvs;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    int          mb_mode;
+    mb_mode=mb_modes[mbi];
+    if(mb_mode!=OC_MODE_INVALID){
+      oc_mv        mbmv;
+      ptrdiff_t    fragi;
+      int          coded[13];
+      int          codedi;
+      int          ncoded;
+      int          mapi;
+      int          mapii;
+      /*Search for at least one coded fragment.*/
+      ncoded=mapii=0;
+      do{
+        mapi=map_idxs[mapii];
+        fragi=mb_maps[mbi][mapi>>2][mapi&3];
+        if(frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      while(++mapii<map_nidxs);
+      if(ncoded<=0)continue;
+      switch(mb_mode){
+        case OC_MODE_INTER_MV_FOUR:{
+          oc_mv       lbmvs[4];
+          int         bi;
+          /*Mark the tail of the list, so we don't accidentally go past it.*/
+          coded[ncoded]=-1;
+          for(bi=codedi=0;bi<4;bi++){
+            if(coded[codedi]==bi){
+              codedi++;
+              fragi=mb_maps[mbi][0][bi];
+              frags[fragi].mb_mode=mb_mode;
+              lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
+            }
+            else lbmvs[bi][0]=lbmvs[bi][1]=0;
+          }
+          if(codedi>0){
+            memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+            memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
+          }
+          if(codedi<ncoded){
+            (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+            for(;codedi<ncoded;codedi++){
+              mapi=coded[codedi];
+              bi=mapi&3;
+              fragi=mb_maps[mbi][mapi>>2][bi];
+              frags[fragi].mb_mode=mb_mode;
+              memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
+            }
+          }
+        }break;
+        case OC_MODE_INTER_MV:{
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
+        case OC_MODE_INTER_MV_LAST2:{
+          memcpy(mbmv,last_mv[1],sizeof(mbmv));
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        default:memset(mbmv,0,sizeof(mbmv));break;
+      }
+      /*4MV mode fills in the fragments itself.
+        For all other modes we can use this common code.*/
+      if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb_maps[mbi][mapi>>2][mapi&3];
+          frags[fragi].mb_mode=mb_mode;
+          memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
+        }
+      }
+    }
+  }
+}
+
+static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        fragi;
+  ncoded_fragis=_dec->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  if(_dec->state.nqis==1){
+    /*If this frame has only a single qi value, then just use it for all coded
+       fragments.*/
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      frags[coded_fragis[fragii]].qii=0;
+    }
+  }
+  else{
+    long val;
+    int  flag;
+    int  nqi1;
+    int  run_count;
+    /*Otherwise, we decode a qi index for each fragment, using two passes of
+      the same binary RLE scheme used for super-block coded bits.
+     The first pass marks each fragment as having a qii of 0 or greater than
+      0, and the second pass (if necessary), distinguishes between a qii of
+      1 and 2.
+     At first we just store the qii in the fragment.
+     After all the qii's are decoded, we make a final pass to replace them
+      with the corresponding qi's for this frame.*/
+    val=oc_pack_read1(&_dec->opb);
+    flag=(int)val;
+    nqi1=0;
+    fragii=0;
+    while(fragii<ncoded_fragis){
+      int full_run;
+      run_count=oc_sb_run_unpack(&_dec->opb);
+      full_run=run_count>=4129;
+      do{
+        frags[coded_fragis[fragii++]].qii=flag;
+        nqi1+=flag;
+      }
+      while(--run_count>0&&fragii<ncoded_fragis);
+      if(full_run&&fragii<ncoded_fragis){
+        val=oc_pack_read1(&_dec->opb);
+        flag=(int)val;
+      }
+      else flag=!flag;
+    }
+    /*TODO: run_count should be 0 here.
+      If it's not, we should issue a warning of some kind.*/
+    /*If we have 3 different qi's for this frame, and there was at least one
+       fragment with a non-zero qi, make the second pass.*/
+    if(_dec->state.nqis==3&&nqi1>0){
+      /*Skip qii==0 fragments.*/
+      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+      do{
+        int full_run;
+        run_count=oc_sb_run_unpack(&_dec->opb);
+        full_run=run_count>=4129;
+        for(;fragii<ncoded_fragis;fragii++){
+          fragi=coded_fragis[fragii];
+          if(frags[fragi].qii==0)continue;
+          if(run_count--<=0)break;
+          frags[fragi].qii+=flag;
+        }
+        if(full_run&&fragii<ncoded_fragis){
+          val=oc_pack_read1(&_dec->opb);
+          flag=(int)val;
+        }
+        else flag=!flag;
+      }
+      while(fragii<ncoded_fragis);
+      /*TODO: run_count should be 0 here.
+        If it's not, we should issue a warning of some kind.*/
+    }
+  }
+}
+
+
+
+/*Unpacks the DC coefficient tokens.
+  Unlike when unpacking the AC coefficient tokens, we actually need to decode
+   the DC coefficient values now so that we can do DC prediction.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  Return: The length of any outstanding EOB run.*/
+static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64]){
+  unsigned char   *dct_tokens;
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        eobs;
+  ptrdiff_t        ti;
+  int              pli;
+  dct_tokens=_dec->dct_tokens;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  ncoded_fragis=fragii=eobs=ti=0;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    ptrdiff_t eobi;
+    int       rli;
+    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
+    memset(run_counts,0,sizeof(run_counts));
+    _dec->eob_runs[pli][0]=eobs;
+    _dec->ti0[pli][0]=ti;
+    /*Continue any previous EOB run, if there was one.*/
+    eobi=eobs;
+    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
+    eob_count=eobi;
+    eobs-=eobi;
+    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+    while(fragii<ncoded_fragis){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
+      if(eobs){
+        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
+        eob_count+=eobi;
+        eobs-=eobi;
+        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+      }
+      else{
+        int coeff;
+        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        if(skip)coeff=0;
+        run_counts[skip]++;
+        frags[coded_fragis[fragii++]].dc=coeff;
+      }
+    }
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return eobs;
+}
+
+/*Unpacks the AC coefficient tokens.
+  This can completely discard coefficient values while unpacking, and so is
+   somewhat simpler than unpacking the DC coefficient tokens.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  _eobs:       The length of any outstanding EOB run from previous
+                coefficients.
+  Return: The length of any outstanding EOB run.*/
+static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+  unsigned char *dct_tokens;
+  ptrdiff_t      ti;
+  int            pli;
+  dct_tokens=_dec->dct_tokens;
+  ti=_dec->dct_tokens_count;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    size_t    ntoks_left;
+    size_t    ntoks;
+    int       rli;
+    _dec->eob_runs[pli][_zzi]=_eobs;
+    _dec->ti0[pli][_zzi]=ti;
+    ntoks_left=_ntoks_left[pli][_zzi];
+    memset(run_counts,0,sizeof(run_counts));
+    eob_count=0;
+    ntoks=0;
+    while(ntoks+_eobs<ntoks_left){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      ntoks+=_eobs;
+      eob_count+=_eobs;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
+      if(_eobs==0){
+        run_counts[skip]++;
+        ntoks++;
+      }
+    }
+    /*Add the portion of the last EOB run actually used by this coefficient.*/
+    eob_count+=ntoks_left-ntoks;
+    /*And remove it from the remaining EOB count.*/
+    _eobs-=ntoks_left-ntoks;
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return _eobs;
+}
+
+/*Tokens describing the DCT coefficients that belong to each fragment are
+   stored in the bitstream grouped by coefficient, not by fragment.
+
+  This means that we either decode all the tokens in order, building up a
+   separate coefficient list for each fragment as we go, and then go back and
+   do the iDCT on each fragment, or we have to create separate lists of tokens
+   for each coefficient, so that we can pull the next token required off the
+   head of the appropriate list when decoding a specific fragment.
+
+  The former was VP3's choice, and it meant 2*w*h extra storage for all the
+   decoded coefficient values.
+
+  We take the second option, which lets us store just one to three bytes per
+   token (generally far fewer than the number of coefficients, due to EOB
+   tokens and zero runs), and which requires us to only maintain a counter for
+   each of the 64 coefficients, instead of a counter for every fragment to
+   determine where the next token goes.
+
+  We actually use 3 counters per coefficient, one for each color plane, so we
+   can decode all color planes simultaneously.
+  This lets color conversion, etc., be done as soon as a full MCU (one or
+   two super block rows) is decoded, while the image data is still in cache.*/
+
+static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
+  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+  ptrdiff_t  ntoks_left[3][64];
+  int        huff_idxs[2];
+  ptrdiff_t  eobs;
+  long       val;
+  int        pli;
+  int        zzi;
+  int        hgi;
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
+  }
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  _dec->eob_runs[0][0]=0;
+  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
+#if defined(HAVE_CAIRO)
+  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  zzi=1;
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
+      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
+    }
+  }
+  /*TODO: eobs should be exactly zero, or 4096 or greater.
+    The second case occurs when an EOB run of size zero is encountered, which
+     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
+    If neither of these conditions holds, then a warning should be issued.*/
+}
+
+
+static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
+  /*pp_level 0: disabled; free any memory used and return*/
+  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
+    if(_dec->dc_qis!=NULL){
+      _ogg_free(_dec->dc_qis);
+      _dec->dc_qis=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->dc_qis==NULL){
+    /*If we haven't been tracking DC quantization indices, there's no point in
+       starting now.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
+    _dec->dc_qis=(unsigned char *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
+    if(_dec->dc_qis==NULL)return 1;
+    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
+  }
+  else{
+    unsigned char   *dc_qis;
+    const ptrdiff_t *coded_fragis;
+    ptrdiff_t        ncoded_fragis;
+    ptrdiff_t        fragii;
+    unsigned char    qi0;
+    /*Update the DC quantization index of each coded block.*/
+    dc_qis=_dec->dc_qis;
+    coded_fragis=_dec->state.coded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[0]+
+     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
+    qi0=(unsigned char)_dec->state.qis[0];
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      dc_qis[coded_fragis[fragii]]=qi0;
+    }
+  }
+  /*pp_level 1: Stop after updating DC quantization indices.*/
+  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
+    if(_dec->variances!=NULL){
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->variances==NULL){
+    size_t frame_sz;
+    size_t c_sz;
+    int    c_w;
+    int    c_h;
+    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+    c_sz=c_w*(size_t)c_h;
+    /*Allocate space for the chroma planes, even if we're not going to use
+       them; this simplifies allocation state management, though it may waste
+       memory on the few systems that don't overcommit pages.*/
+    frame_sz+=c_sz<<1;
+    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
+     frame_sz*sizeof(_dec->pp_frame_data[0]));
+    _dec->variances=(int *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->variances[0]));
+    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      return 1;
+    }
+    /*Force an update of the PP buffer pointers.*/
+    _dec->pp_frame_state=0;
+  }
+  /*Update the PP buffer pointers if necessary.*/
+  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
+    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+      /*If chroma processing is disabled, just use the PP luma plane.*/
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
+       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
+    }
+    else{
+      size_t y_sz;
+      size_t c_sz;
+      int    c_w;
+      int    c_h;
+      /*Otherwise, set up pointers to all three PP planes.*/
+      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+      c_sz=c_w*(size_t)c_h;
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
+      _dec->pp_frame_buf[1].width=c_w;
+      _dec->pp_frame_buf[1].height=c_h;
+      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
+      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
+      _dec->pp_frame_buf[2].width=c_w;
+      _dec->pp_frame_buf[2].height=c_h;
+      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
+      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
+      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
+    }
+    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
+  }
+  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
+  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+    memcpy(_dec->pp_frame_buf+1,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
+     sizeof(_dec->pp_frame_buf[1])*2);
+  }
+  return 0;
+}
+
+
+
+typedef struct{
+  int                 bounding_values[256];
+  ptrdiff_t           ti[3][64];
+  ptrdiff_t           eob_runs[3][64];
+  const ptrdiff_t    *coded_fragis[3];
+  const ptrdiff_t    *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  const ogg_uint16_t *dequant[3][3][2];
+  int                 fragy0[3];
+  int                 fragy_end[3];
+  int                 pred_last[3][3];
+  int                 mcu_nvfrags;
+  int                 loop_filter;
+  int                 pp_level;
+}oc_dec_pipeline_state;
+
+
+
+/*Initialize the main decoding pipeline.*/
+static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe){
+  const ptrdiff_t *coded_fragis;
+  const ptrdiff_t *uncoded_fragis;
+  int              pli;
+  int              qii;
+  int              qti;
+  /*If chroma is sub-sampled in the vertical direction, we have to decode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
+  /*Initialize the token and extra bits indices for each plane and
+     coefficient.*/
+  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+  /*Also copy over the initial the EOB run counts.*/
+  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ncoded_fragis;
+    _pipe->coded_fragis[pli]=coded_fragis;
+    _pipe->uncoded_fragis[pli]=uncoded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[pli];
+    coded_fragis+=ncoded_fragis;
+    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
+  }
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_dec->state.nqis;qii++){
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=
+         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
+      }
+    }
+  }
+  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
+  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
+  /*Initialize the bounding value array for the loop filter.*/
+  _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
+   _pipe->bounding_values);
+  /*Initialize any buffers needed for post-processing.
+    We also save the current post-processing level, to guard against the user
+     changing it from a callback.*/
+  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
+  /*If we don't have enough information to post-process, disable it, regardless
+     of the user-requested level.*/
+  else{
+    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
+    memcpy(_dec->pp_frame_buf,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
+     sizeof(_dec->pp_frame_buf[0])*3);
+  }
+}
+
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+   rows).
+  As a side effect, the number of coded and uncoded fragments in this plane of
+   the MCU is also computed.*/
+static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  const oc_fragment_plane *fplane;
+  oc_fragment             *frags;
+  int                     *pred_last;
+  ptrdiff_t                ncoded_fragis;
+  ptrdiff_t                fragi;
+  int                      fragx;
+  int                      fragy;
+  int                      fragy0;
+  int                      fragy_end;
+  int                      nhfrags;
+  /*Compute the first and last fragment row of the current MCU for this
+     plane.*/
+  fplane=_dec->state.fplanes+_pli;
+  fragy0=_pipe->fragy0[_pli];
+  fragy_end=_pipe->fragy_end[_pli];
+  nhfrags=fplane->nhfrags;
+  pred_last=_pipe->pred_last[_pli];
+  frags=_dec->state.frags;
+  ncoded_fragis=0;
+  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
+  for(fragy=fragy0;fragy<fragy_end;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          pred_last[ref]=frags[fragi].dc+=pred_last[ref];
+          ncoded_fragis++;
+        }
+      }
+    }
+    else{
+      oc_fragment *u_frags;
+      int          l_ref;
+      int          ul_ref;
+      int          u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else{
+          ur_ref=u_frags[fragi+1].coded?
+           OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+        }
+        if(frags[fragi].coded){
+          int pred;
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==ref)|(ul_ref==ref)<<1|
+           (u_ref==ref)<<2|(ur_ref==ref)<<3){
+            default:pred=pred_last[ref];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          pred_last[ref]=frags[fragi].dc+=pred;
+          ncoded_fragis++;
+          l_ref=ref;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  /*Also save the number of uncoded fragments so we know how many to copy.*/
+  _pipe->nuncoded_fragis[_pli]=
+   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
+}
+
+/*Reconstructs all coded fragments in a single MCU (one or two super block
+   rows).
+  This requires that each coded fragment have a proper macro block mode and
+   motion vector (if not in INTRA mode), and have it's DC value decoded, with
+   the DC prediction process reversed, and the number of coded and uncoded
+   fragments in this plane of the MCU be counted.
+  The token lists for each color plane and coefficient should also be filled
+   in, along with initial token offsets, extra bits offsets, and EOB run
+   counts.*/
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  unsigned char       *dct_tokens;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint16_t         dc_quant[2];
+  const oc_fragment   *frags;
+  const ptrdiff_t     *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t            fragii;
+  ptrdiff_t           *ti;
+  ptrdiff_t           *eob_runs;
+  int                  qti;
+  dct_tokens=_dec->dct_tokens;
+  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
+  frags=_dec->state.frags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  ti=_pipe->ti[_pli];
+  eob_runs=_pipe->eob_runs[_pli];
+  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
+  for(fragii=0;fragii<ncoded_fragis;fragii++){
+    /*This array is made one element larger because the zig-zag index array
+       uses the final element as a dumping ground for out-of-range indices
+       to protect us from buffer overflow.*/
+    OC_ALIGN8(ogg_int16_t dct_coeffs[65]);
+    const ogg_uint16_t *ac_quant;
+    ptrdiff_t           fragi;
+    int                 last_zzi;
+    int                 zzi;
+    fragi=coded_fragis[fragii];
+    for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
+    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
+    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
+    /*Decode the AC coefficients.*/
+    for(zzi=0;zzi<64;){
+      int token;
+      last_zzi=zzi;
+      if(eob_runs[zzi]){
+        eob_runs[zzi]--;
+        break;
+      }
+      else{
+        ptrdiff_t eob;
+        int       cw;
+        int       rlen;
+        int       coeff;
+        int       lti;
+        lti=ti[zzi];
+        token=dct_tokens[lti++];
+        cw=OC_DCT_CODE_WORD[token];
+        /*These parts could be done branchless, but the branches are fairly
+           predictable and the C code translates into more than a few
+           instructions, so it's worth it to avoid them.*/
+        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
+        }
+        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+        if(token==OC_DCT_TOKEN_FAT_EOB){
+          eob+=dct_tokens[lti++]<<8;
+          if(eob==0)eob=OC_DCT_EOB_FINISH;
+        }
+        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        eob_runs[zzi]=eob;
+        ti[zzi]=lti;
+        zzi+=rlen;
+        dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
+        zzi+=!eob;
+      }
+    }
+    /*TODO: zzi should be exactly 64 here.
+      If it's not, we should report some kind of warning.*/
+    zzi=OC_MINI(zzi,64);
+    dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+    /*last_zzi is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    oc_state_frag_recon(&_dec->state,fragi,_pli,
+     dct_coeffs,last_zzi,dc_quant[qti]);
+  }
+  _pipe->coded_fragis[_pli]+=ncoded_fragis;
+  /*Right now the reconstructed MCU has only the coded blocks in it.*/
+  /*TODO: We make the decision here to always copy the uncoded blocks into it
+     from the reference frame.
+    We could also copy the coded blocks back over the reference frame, if we
+     wait for an additional MCU to be decoded, which might be faster if only a
+     small number of blocks are coded.
+    However, this introduces more latency, creating a larger cache footprint.
+    It's unknown which decision is better, but this one results in simpler
+     code, and the hard case (high bitrate, high resolution) is handled
+     correctly.*/
+  /*Copy the uncoded blocks from the previous reference frame.*/
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+}
+
+/*Filter a horizontal block edge.*/
+static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
+ int *_variance0,int *_variance1){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  const unsigned char *csrc;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  rdst=_dst;
+  rsrc=_src;
+  for(bx=0;bx<8;bx++){
+    cdst=rdst;
+    csrc=rsrc;
+    for(by=0;by<10;by++){
+      r[by]=*csrc;
+      csrc+=_src_ystride;
+    }
+    sum0=sum1=0;
+    for(by=0;by<4;by++){
+      sum0+=abs(r[by+1]-r[by]);
+      sum1+=abs(r[by+5]-r[by+6]);
+    }
+    *_variance0+=OC_MINI(255,sum0);
+    *_variance1+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      cdst+=_dst_ystride;
+      for(by=0;by<4;by++){
+        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
+         r[by+4]+r[by+5]+r[by+6]+4>>3);
+        cdst+=_dst_ystride;
+      }
+      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    else{
+      for(by=1;by<=8;by++){
+        *cdst=(unsigned char)r[by];
+        cdst+=_dst_ystride;
+      }
+    }
+    rdst++;
+    rsrc++;
+  }
+}
+
+/*Filter a vertical block edge.*/
+static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
+ int _qstep,int _flimit,int *_variances){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  cdst=_dst;
+  for(by=0;by<8;by++){
+    rsrc=cdst-1;
+    rdst=cdst;
+    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+    sum0=sum1=0;
+    for(bx=0;bx<4;bx++){
+      sum0+=abs(r[bx+1]-r[bx]);
+      sum1+=abs(r[bx+5]-r[bx+6]);
+    }
+    _variances[0]+=OC_MINI(255,sum0);
+    _variances[1]+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      for(bx=0;bx<4;bx++){
+        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
+         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
+      }
+      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    else for(bx=1;bx<=8;bx++)*rdst++=(unsigned char)r[bx];
+    cdst+=_dst_ystride;
+  }
+}
+
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
+  oc_fragment_plane   *fplane;
+  int                 *variance;
+  unsigned char       *dc_qi;
+  unsigned char       *dst;
+  const unsigned char *src;
+  ptrdiff_t            froffset;
+  int                  dst_ystride;
+  int                  src_ystride;
+  int                  nhfrags;
+  int                  width;
+  int                  notstart;
+  int                  notdone;
+  int                  flimit;
+  int                  qstep;
+  int                  y_end;
+  int                  y;
+  int                  x;
+  _dst+=_pli;
+  _src+=_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  dc_qi=_dec->dc_qis+froffset;
+  notstart=_fragy0>0;
+  notdone=_fragy_end<fplane->nvfrags;
+  /*We want to clear an extra row of variances, except at the end.*/
+  memset(variance+(nhfrags&-notstart),0,
+   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
+  /*Except for the first time, we want to point to the middle of the row.*/
+  y=(_fragy0<<3)+(notstart<<2);
+  dst_ystride=_dst->stride;
+  src_ystride=_src->stride;
+  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+  src=_src->data+y*(ptrdiff_t)src_ystride;
+  width=_dst->width;
+  for(;y<4;y++){
+    memcpy(dst,src,width*sizeof(dst[0]));
+    dst+=dst_ystride;
+    src+=src_ystride;
+  }
+  /*We also want to skip the last row in the frame for this loop.*/
+  y_end=_fragy_end-!notdone<<3;
+  for(;y<y_end;y+=8){
+    qstep=_dec->pp_dc_scale[*dc_qi];
+    flimit=(qstep*3)>>2;
+    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+     qstep,flimit,variance,variance+nhfrags);
+    variance++;
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi];
+      flimit=(qstep*3)>>2;
+      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+       qstep,flimit,variance,variance+nhfrags);
+      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
+       qstep,flimit,variance-1);
+      variance++;
+      dc_qi++;
+    }
+    dst+=dst_ystride<<3;
+    src+=src_ystride<<3;
+  }
+  /*And finally, handle the last row in the frame, if it's in the range.*/
+  if(!notdone){
+    int height;
+    height=_dst->height;
+    for(;y<height;y++){
+      memcpy(dst,src,width*sizeof(dst[0]));
+      dst+=dst_ystride;
+      src+=src_ystride;
+    }
+    /*Filter the last row of vertical block edges.*/
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi++];
+      flimit=(qstep*3)>>2;
+      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
+       qstep,flimit,variance++);
+    }
+  }
+}
+
+static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+ int _dc_scale,int _sharp_mod,int _strong){
+  static const unsigned char MOD_MAX[2]={24,32};
+  static const unsigned char MOD_SHIFT[2]={1,0};
+  const unsigned char *psrc;
+  const unsigned char *src;
+  const unsigned char *nsrc;
+  unsigned char       *dst;
+  int                  vmod[72];
+  int                  hmod[72];
+  int                  mod_hi;
+  int                  by;
+  int                  bx;
+  mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
+  dst=_idata;
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  for(by=0;by<9;by++){
+    for(bx=0;bx<8;bx++){
+      int mod;
+      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<MOD_SHIFT[_strong]);
+      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+    }
+    psrc=src;
+    src+=_ystride&-(!(_b&8)|by<7);
+  }
+  nsrc=dst;
+  psrc=dst-!(_b&1);
+  for(bx=0;bx<9;bx++){
+    src=nsrc;
+    for(by=0;by<8;by++){
+      int mod;
+      mod=32+_dc_scale-(abs(*src-*psrc)<<MOD_SHIFT[_strong]);
+      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+      psrc+=_ystride;
+      src+=_ystride;
+    }
+    psrc=nsrc;
+    nsrc+=!(_b&2)|bx<7;
+  }
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  nsrc=src+_ystride;
+  for(by=0;by<8;by++){
+    int a;
+    int b;
+    int w;
+    a=128;
+    b=64;
+    w=hmod[by];
+    a-=w;
+    b+=w**(src-!(_b&1));
+    w=vmod[by<<3];
+    a-=w;
+    b+=w*psrc[0];
+    w=vmod[by+1<<3];
+    a-=w;
+    b+=w*nsrc[0];
+    w=hmod[(1<<3)+by];
+    a-=w;
+    b+=w*src[1];
+    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
+    for(bx=1;bx<7;bx++){
+      a=128;
+      b=64;
+      w=hmod[(bx<<3)+by];
+      a-=w;
+      b+=w*src[bx-1];
+      w=vmod[(by<<3)+bx];
+      a-=w;
+      b+=w*psrc[bx];
+      w=vmod[(by+1<<3)+bx];
+      a-=w;
+      b+=w*nsrc[bx];
+      w=hmod[(bx+1<<3)+by];
+      a-=w;
+      b+=w*src[bx+1];
+      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
+    }
+    a=128;
+    b=64;
+    w=hmod[(7<<3)+by];
+    a-=w;
+    b+=w*src[6];
+    w=vmod[(by<<3)+7];
+    a-=w;
+    b+=w*psrc[7];
+    w=vmod[(by+1<<3)+7];
+    a-=w;
+    b+=w*nsrc[7];
+    w=hmod[(8<<3)+by];
+    a-=w;
+    b+=w*src[7+!(_b&2)];
+    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
+    dst+=_ystride;
+    psrc=src;
+    src=nsrc;
+    nsrc+=_ystride&-(!(_b&8)|by<6);
+  }
+}
+
+#define OC_DERING_THRESH1 (384)
+#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
+#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
+#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
+
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
+  th_img_plane      *iplane;
+  oc_fragment_plane *fplane;
+  oc_fragment       *frag;
+  int               *variance;
+  unsigned char     *idata;
+  ptrdiff_t          froffset;
+  int                ystride;
+  int                nhfrags;
+  int                sthresh;
+  int                strong;
+  int                y_end;
+  int                width;
+  int                height;
+  int                y;
+  int                x;
+  iplane=_img+_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  frag=_dec->state.frags+froffset;
+  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
+  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
+  y=_fragy0<<3;
+  ystride=iplane->stride;
+  idata=iplane->data+y*(ptrdiff_t)ystride;
+  y_end=_fragy_end<<3;
+  width=iplane->width;
+  height=iplane->height;
+  for(;y<y_end;y+=8){
+    for(x=0;x<width;x+=8){
+      int b;
+      int qi;
+      int var;
+      qi=_dec->state.qis[frag->qii];
+      var=*variance;
+      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
+      if(strong&&var>sthresh){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+         !(b&2)&&variance[1]>OC_DERING_THRESH4||
+         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        }
+      }
+      else if(var>OC_DERING_THRESH2){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+      }
+      else if(var>OC_DERING_THRESH1){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
+      }
+      frag++;
+      variance++;
+    }
+    idata+=ystride<<3;
+  }
+}
+
+
+
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
+  oc_dec_ctx *dec;
+  if(_info==NULL||_setup==NULL)return NULL;
+  dec=_ogg_malloc(sizeof(*dec));
+  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
+    _ogg_free(dec);
+    return NULL;
+  }
+  dec->state.curframe_num=0;
+  return dec;
+}
+
+void th_decode_free(th_dec_ctx *_dec){
+  if(_dec!=NULL){
+    oc_dec_clear(_dec);
+    _ogg_free(_dec);
+  }
+}
+
+int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz){
+  switch(_req){
+  case TH_DECCTL_GET_PPLEVEL_MAX:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    (*(int *)_buf)=OC_PP_LEVEL_MAX;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_PPLEVEL:{
+    int pp_level;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    pp_level=*(int *)_buf;
+    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
+    _dec->pp_level=pp_level;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_GRANPOS:{
+    ogg_int64_t granpos;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
+    granpos=*(ogg_int64_t *)_buf;
+    if(granpos<0)return TH_EINVAL;
+    _dec->state.granpos=granpos;
+    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
+     -_dec->state.granpos_bias;
+    _dec->state.curframe_num=_dec->state.keyframe_num
+     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
+    return 0;
+  }break;
+  case TH_DECCTL_SET_STRIPE_CB:{
+    th_stripe_callback *cb;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
+    cb=(th_stripe_callback *)_buf;
+    _dec->stripe_cb.ctx=cb->ctx;
+    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+    return 0;
+  }break;
+#ifdef HAVE_CAIRO
+  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mbmode=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_MV:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mv=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_QI:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_qi=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_BITS:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_bits=*(int *)_buf;
+    return 0;
+  }break;
+#endif
+  default:return TH_EIMPL;
+  }
+}
+
+int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos){
+  int ret;
+  if(_dec==NULL||_op==NULL)return TH_EFAULT;
+  /*A completely empty packet indicates a dropped frame and is treated exactly
+     like an inter frame with no coded blocks.
+    Only proceed if we have a non-empty packet.*/
+  if(_op->bytes!=0){
+    oc_dec_pipeline_state pipe;
+    th_ycbcr_buffer       stripe_buf;
+    int                   stripe_fragy;
+    int                   refi;
+    int                   pli;
+    int                   notstart;
+    int                   notdone;
+    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_frame_bytes=_op->bytes;
+#endif
+    ret=oc_dec_frame_header_unpack(_dec);
+    if(ret<0)return ret;
+    /*Select a free buffer to use for the reconstructed version of this
+       frame.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME&&
+     (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
+      th_info *info;
+      size_t       yplane_sz;
+      size_t       cplane_sz;
+      int          yhstride;
+      int          yheight;
+      int          chstride;
+      int          cheight;
+      /*We're decoding an INTER frame, but have no initialized reference
+         buffers (i.e., decoding did not start on a key frame).
+        We initialize them to a solid gray here.*/
+      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
+      _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1;
+      info=&_dec->state.info;
+      yhstride=info->frame_width+2*OC_UMV_PADDING;
+      yheight=info->frame_height+2*OC_UMV_PADDING;
+      chstride=yhstride>>!(info->pixel_fmt&1);
+      cheight=yheight>>!(info->pixel_fmt&2);
+      yplane_sz=yhstride*(size_t)yheight;
+      cplane_sz=chstride*(size_t)cheight;
+      memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
+    }
+    else{
+      for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
+       refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+    }
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      oc_dec_mark_all_intra(_dec);
+      _dec->state.keyframe_num=_dec->state.curframe_num;
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=
+       _dec->telemetry_mode_bytes=
+       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    else{
+      oc_dec_coded_flags_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mb_modes_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    oc_dec_block_qis_unpack(_dec);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    oc_dec_residual_tokens_unpack(_dec);
+    /*Update granule position.
+      This must be done before the striped decode callbacks so that the
+       application knows what to do with the frame data.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    /*All of the rest of the operations -- DC prediction reversal,
+       reconstructing coded fragments, copying uncoded fragments, loop
+       filtering, extending borders, and out-of-loop post-processing -- should
+       be pipelined.
+      I.e., DC prediction reversal, reconstruction, and uncoded fragment
+       copying are done for one or two super block rows, then loop filtering is
+       run as far as it can, then bordering copying, then post-processing.
+      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+       block rows, and one chroma.
+      Otherwise, an MCU consists of one super block row from each plane.
+      Inside each MCU, we perform all of the steps on one color plane before
+       moving on to the next.
+      After reconstruction, the additional filtering stages introduce a delay
+       since they need some pixels from the next fragment row.
+      Thus the actual number of decoded rows available is slightly smaller for
+       the first MCU, and slightly larger for the last.
+
+      This entire process allows us to operate on the data while it is still in
+       cache, resulting in big performance improvements.
+      An application callback allows further application processing (blitting
+       to video memory, color conversion, etc.) to also use the data while it's
+       in cache.*/
+    oc_dec_pipeline_init(_dec,&pipe);
+    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+    notstart=0;
+    notdone=1;
+    for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
+      int avail_fragy0;
+      int avail_fragy_end;
+      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
+      for(pli=0;pli<3;pli++){
+        oc_fragment_plane *fplane;
+        int                frag_shift;
+        int                pp_offset;
+        int                sdelay;
+        int                edelay;
+        fplane=_dec->state.fplanes+pli;
+        /*Compute the first and last fragment row of the current MCU for this
+           plane.*/
+        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
+        sdelay=edelay=0;
+        if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
+           refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+        }
+        /*To fill the borders, we have an additional two pixel delay, since a
+           fragment in the next row could filter its top edge, using two pixels
+           from a fragment in this row.
+          But there's no reason to delay a full fragment between the two.*/
+        oc_state_borders_fill_rows(&_dec->state,refi,pli,
+         (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+         (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+        /*Out-of-loop post-processing.*/
+        pp_offset=3*(pli!=0);
+        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+          /*Perform de-blocking in one plane.*/
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+           _dec->state.ref_frame_bufs[refi],pli,
+           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+            /*Perform de-ringing in one plane.*/
+            sdelay+=notstart;
+            edelay+=notdone;
+            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          }
+        }
+        /*If no post-processing is done, we still need to delay a row for the
+           loop filter, thanks to the strange filtering order VP3 chose.*/
+        else if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+        }
+        /*Compute the intersection of the available rows in all planes.
+          If chroma is sub-sampled, the effect of each of its delays is
+           doubled, but luma might have more post-processing filters enabled
+           than chroma, so we don't know up front which one is the limiting
+           factor.*/
+        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
+        avail_fragy_end=OC_MINI(avail_fragy_end,
+         pipe.fragy_end[pli]-edelay<<frag_shift);
+      }
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+        /*The callback might want to use the FPU, so let's make sure they can.
+          We violate all kinds of ABI restrictions by not doing this until
+           now, but none of them actually matter since we don't use floating
+           point ourselves.*/
+        oc_restore_fpu(&_dec->state);
+        /*Make the callback, ensuring we flip the sense of the "start" and
+           "end" of the available region upside down.*/
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+         _dec->state.fplanes[0].nvfrags-avail_fragy0);
+      }
+      notstart=1;
+    }
+    /*Finish filling in the reference frame borders.*/
+    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
+    /*Update the reference frame indices.*/
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    else{
+      /*Otherwise, just replace the previous reference frame.*/
+      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+       gamma values, if nothing else).*/
+    oc_restore_fpu(&_dec->state);
+#if defined(OC_DUMP_IMAGES)
+    /*Don't dump images for dropped frames.*/
+    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
+    return 0;
+  }
+  else{
+    /*Just update the granule position and return.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    return TH_DUPFRAME;
+  }
+}
+
+int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
+  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
+  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
+#if defined(HAVE_CAIRO)
+  /*If telemetry ioctls are active, we need to draw to the output buffer.
+    Stuff the plane into cairo.*/
+  if(_dec->telemetry){
+    cairo_surface_t *cs;
+    unsigned char   *data;
+    unsigned char   *y_row;
+    unsigned char   *u_row;
+    unsigned char   *v_row;
+    unsigned char   *rgb_row;
+    int              cstride;
+    int              w;
+    int              h;
+    int              x;
+    int              y;
+    int              hdec;
+    int              vdec;
+    w=_ycbcr[0].width;
+    h=_ycbcr[0].height;
+    hdec=!(_dec->state.info.pixel_fmt&1);
+    vdec=!(_dec->state.info.pixel_fmt&2);
+    /*Lazy data buffer init.
+      We could try to re-use the post-processing buffer, which would save
+       memory, but complicate the allocation logic there.
+      I don't think anyone cares about memory usage when using telemetry; it is
+       not meant for embedded devices.*/
+    if(_dec->telemetry_frame_data==NULL){
+      _dec->telemetry_frame_data=_ogg_malloc(
+       (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
+      if(_dec->telemetry_frame_data==NULL)return 0;
+    }
+    cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
+    /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
+    data=cairo_image_surface_get_data(cs);
+    if(data==NULL){
+      cairo_surface_destroy(cs);
+      return 0;
+    }
+    cstride=cairo_image_surface_get_stride(cs);
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    for(y=0;y<h;y++){
+      for(x=0;x<w;x++){
+        int r;
+        int g;
+        int b;
+        r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
+        g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
+         -2672387*v_row[x>>hdec]+447306710)/3287200;
+        b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
+        rgb_row[4*x+0]=OC_CLAMP255(b);
+        rgb_row[4*x+1]=OC_CLAMP255(g);
+        rgb_row[4*x+2]=OC_CLAMP255(r);
+      }
+      y_row+=_ycbcr[0].stride;
+      u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
+      v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
+      rgb_row+=cstride;
+    }
+    /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
+    {
+      cairo_t           *c;
+      const oc_fragment *frags;
+      oc_mv             *frag_mvs;
+      const signed char *mb_modes;
+      oc_mb_map         *mb_maps;
+      size_t             nmbs;
+      size_t             mbi;
+      int                row2;
+      int                col2;
+      int                qim[3]={0,0,0};
+      if(_dec->state.nqis==2){
+        int bqi;
+        bqi=_dec->state.qis[0];
+        if(_dec->state.qis[1]>bqi)qim[1]=1;
+        if(_dec->state.qis[1]<bqi)qim[1]=-1;
+      }
+      if(_dec->state.nqis==3){
+        int bqi;
+        int cqi;
+        int dqi;
+        bqi=_dec->state.qis[0];
+        cqi=_dec->state.qis[1];
+        dqi=_dec->state.qis[2];
+        if(cqi>bqi&&dqi>bqi){
+          if(dqi>cqi){
+            qim[1]=1;
+            qim[2]=2;
+          }
+          else{
+            qim[1]=2;
+            qim[2]=1;
+          }
+        }
+        else if(cqi<bqi&&dqi<bqi){
+          if(dqi<cqi){
+            qim[1]=-1;
+            qim[2]=-2;
+          }
+          else{
+            qim[1]=-2;
+            qim[2]=-1;
+          }
+        }
+        else{
+          if(cqi<bqi)qim[1]=-1;
+          else qim[1]=1;
+          if(dqi<bqi)qim[2]=-1;
+          else qim[2]=1;
+        }
+      }
+      c=cairo_create(cs);
+      frags=_dec->state.frags;
+      frag_mvs=_dec->state.frag_mvs;
+      mb_modes=_dec->state.mb_modes;
+      mb_maps=_dec->state.mb_maps;
+      nmbs=_dec->state.nmbs;
+      row2=0;
+      col2=0;
+      for(mbi=0;mbi<nmbs;mbi++){
+        float x;
+        float y;
+        int   bi;
+        y=h-(row2+((col2+1>>1)&1))*16-16;
+        x=(col2>>1)*16;
+        cairo_set_line_width(c,1.);
+        /*Keyframe (all intra) red box.*/
+        if(_dec->state.frame_type==OC_INTRA_FRAME){
+          if(_dec->telemetry_mbmode&0x02){
+            cairo_set_source_rgba(c,1.,0,0,.5);
+            cairo_rectangle(c,x+2.5,y+2.5,11,11);
+            cairo_stroke_preserve(c);
+            cairo_set_source_rgba(c,1.,0,0,.25);
+            cairo_fill(c);
+          }
+        }
+        else{
+          const signed char *frag_mv;
+          ptrdiff_t          fragi;
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              frag_mv=frag_mvs[fragi];
+              break;
+            }
+          }
+          if(bi<4){
+            switch(mb_modes[mbi]){
+              case OC_MODE_INTRA:{
+                if(_dec->telemetry_mbmode&0x02){
+                  cairo_set_source_rgba(c,1.,0,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,0,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_NOMV:{
+                if(_dec->telemetry_mbmode&0x01){
+                  cairo_set_source_rgba(c,0,0,1.,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,0,0,1.,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV:{
+                if(_dec->telemetry_mbmode&0x04){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x04){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST:{
+                if(_dec->telemetry_mbmode&0x08){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x08){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST2:{
+                if(_dec->telemetry_mbmode&0x10){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+8,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+8,y+13.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x10){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_NOMV:{
+                if(_dec->telemetry_mbmode&0x20){
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_MV:{
+                if(_dec->telemetry_mbmode&0x40){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x40){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_FOUR:{
+                if(_dec->telemetry_mbmode&0x80){
+                  cairo_rectangle(c,x+2.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+2.5,y+9.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+9.5,4,4);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                /*4mv is odd, coded in raster order.*/
+                fragi=mb_maps[mbi][0][0];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][1];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][2];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+4);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][3];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+4);
+                  cairo_stroke(c);
+                }
+              }break;
+            }
+          }
+        }
+        /*qii illustration.*/
+        if(_dec->telemetry_qi&0x2){
+          cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
+          for(bi=0;bi<4;bi++){
+            ptrdiff_t fragi;
+            int       qiv;
+            int       xp;
+            int       yp;
+            xp=x+(bi&1)*8;
+            yp=y+8-(bi&2)*4;
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              qiv=qim[frags[fragi].qii];
+              cairo_set_line_width(c,3.);
+              cairo_set_source_rgba(c,0.,0.,0.,.5);
+              switch(qiv){
+                /*Double plus:*/
+                case 2:{
+                  if((bi&1)^((bi&2)>>1)){
+                    cairo_move_to(c,xp+2.5,yp+1.5);
+                    cairo_line_to(c,xp+2.5,yp+3.5);
+                    cairo_move_to(c,xp+1.5,yp+2.5);
+                    cairo_line_to(c,xp+3.5,yp+2.5);
+                    cairo_move_to(c,xp+5.5,yp+4.5);
+                    cairo_line_to(c,xp+5.5,yp+6.5);
+                    cairo_move_to(c,xp+4.5,yp+5.5);
+                    cairo_line_to(c,xp+6.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                  else{
+                    cairo_move_to(c,xp+5.5,yp+1.5);
+                    cairo_line_to(c,xp+5.5,yp+3.5);
+                    cairo_move_to(c,xp+4.5,yp+2.5);
+                    cairo_line_to(c,xp+6.5,yp+2.5);
+                    cairo_move_to(c,xp+2.5,yp+4.5);
+                    cairo_line_to(c,xp+2.5,yp+6.5);
+                    cairo_move_to(c,xp+1.5,yp+5.5);
+                    cairo_line_to(c,xp+3.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                }break;
+                /*Double minus:*/
+                case -2:{
+                  cairo_move_to(c,xp+2.5,yp+2.5);
+                  cairo_line_to(c,xp+5.5,yp+2.5);
+                  cairo_move_to(c,xp+2.5,yp+5.5);
+                  cairo_line_to(c,xp+5.5,yp+5.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,1.,1.);
+                }break;
+                /*Plus:*/
+                case 1:{
+                  if(bi&2==0)yp-=2;
+                  if(bi&1==0)xp-=2;
+                  cairo_move_to(c,xp+4.5,yp+2.5);
+                  cairo_line_to(c,xp+4.5,yp+6.5);
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,.1,1.,.3,1.);
+                  break;
+                }
+                /*Fall through.*/
+                /*Minus:*/
+                case -1:{
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,.3,.1,1.);
+                }break;
+                default:continue;
+              }
+              cairo_set_line_width(c,1.);
+              cairo_stroke(c);
+            }
+          }
+        }
+        col2++;
+        if((col2>>1)>=_dec->state.nhmbs){
+          col2=0;
+          row2+=2;
+        }
+      }
+      /*Bit usage indicator[s]:*/
+      if(_dec->telemetry_bits){
+        int widths[6];
+        int fpsn;
+        int fpsd;
+        int mult;
+        int fullw;
+        int padw;
+        int i;
+        fpsn=_dec->state.info.fps_numerator;
+        fpsd=_dec->state.info.fps_denominator;
+        mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
+        fullw=250*h*fpsd*mult/fpsn;
+        padw=w-24;
+        /*Header and coded block bits.*/
+        if(_dec->telemetry_frame_bytes<0||
+         _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
+          _dec->telemetry_frame_bytes=0;
+        }
+        if(_dec->telemetry_coding_bytes<0||
+         _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_coding_bytes=0;
+        }
+        if(_dec->telemetry_mode_bytes<0||
+         _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mode_bytes=0;
+        }
+        if(_dec->telemetry_mv_bytes<0||
+         _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mv_bytes=0;
+        }
+        if(_dec->telemetry_qi_bytes<0||
+         _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_qi_bytes=0;
+        }
+        if(_dec->telemetry_dc_bytes<0||
+         _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_dc_bytes=0;
+        }
+        widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
+        widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
+        widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
+        widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
+        widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
+        widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
+        for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
+        cairo_set_source_rgba(c,.0,.0,.0,.6);
+        cairo_rectangle(c,10,h-33,widths[0]+1,5);
+        cairo_rectangle(c,10,h-29,widths[1]+1,5);
+        cairo_rectangle(c,10,h-25,widths[2]+1,5);
+        cairo_rectangle(c,10,h-21,widths[3]+1,5);
+        cairo_rectangle(c,10,h-17,widths[4]+1,5);
+        cairo_rectangle(c,10,h-13,widths[5]+1,5);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,1,0,0);
+        cairo_rectangle(c,10.5,h-32.5,widths[0],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,1,0);
+        cairo_rectangle(c,10.5,h-28.5,widths[1],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,0,1);
+        cairo_rectangle(c,10.5,h-24.5,widths[2],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.6,.4,.0);
+        cairo_rectangle(c,10.5,h-20.5,widths[3],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.3,.3,.3);
+        cairo_rectangle(c,10.5,h-16.5,widths[4],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.5,.5,.8);
+        cairo_rectangle(c,10.5,h-12.5,widths[5],4);
+        cairo_fill(c);
+      }
+      /*Master qi indicator[s]:*/
+      if(_dec->telemetry_qi&0x1){
+        cairo_text_extents_t extents;
+        char                 buffer[10];
+        int                  p;
+        int                  y;
+        p=0;
+        y=h-7.5;
+        if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
+        buffer[p++]=48+_dec->state.qis[0]%10;
+        if(_dec->state.nqis>=2){
+          buffer[p++]=' ';
+          if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
+          buffer[p++]=48+_dec->state.qis[1]%10;
+        }
+        if(_dec->state.nqis==3){
+          buffer[p++]=' ';
+          if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
+          buffer[p++]=48+_dec->state.qis[2]%10;
+        }
+        buffer[p++]='\0';
+        cairo_select_font_face(c,"sans",
+         CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
+        cairo_set_font_size(c,18);
+        cairo_text_extents(c,buffer,&extents);
+        cairo_set_source_rgb(c,1,1,1);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_show_text(c,buffer);
+        cairo_set_source_rgb(c,0,0,0);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_text_path(c,buffer);
+        cairo_set_line_width(c,.8);
+        cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
+        cairo_stroke(c);
+      }
+      cairo_destroy(c);
+    }
+    /*Out of the Cairo plane into the telemetry YUV buffer.*/
+    _ycbcr[0].data=_dec->telemetry_frame_data;
+    _ycbcr[0].stride=_ycbcr[0].width;
+    _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
+    _ycbcr[1].stride=_ycbcr[1].width;
+    _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
+    _ycbcr[2].stride=_ycbcr[2].width;
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    /*This is one of the few places it's worth handling chroma on a
+       case-by-case basis.*/
+    switch(_dec->state.info.pixel_fmt){
+      case TH_PF_420:{
+        for(y=0;y<h;y+=2){
+          unsigned char *y_row2;
+          unsigned char *rgb_row2;
+          y_row2=y_row+_ycbcr[0].stride;
+          rgb_row2=rgb_row+cstride;
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
+             +24966*rgb_row2[4*x+0]+4207500)/255000;
+            y_row2[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
+             +24966*rgb_row2[4*x+4]+4207500)/255000;
+            y_row2[x+1]=OC_CLAMP255(y);
+            u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
+             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
+             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
+            v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
+              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
+              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride<<1;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride<<1;
+        }
+      }break;
+      case TH_PF_422:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
+            v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+      /*case TH_PF_444:*/
+      default:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x++){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
+             +99232*rgb_row[4*x+0]+29032005)/225930;
+            v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
+             -25536*rgb_row[4*x+0]+45940035)/357510;
+            y_row[x]=OC_CLAMP255(y);
+            u_row[x]=OC_CLAMP255(u);
+            v_row[x]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+    }
+    /*Finished.
+      Destroy the surface.*/
+    cairo_surface_destroy(cs);
+  }
+#endif
+  return 0;
+}

Copied: trunk/theora/lib/dequant.c (from rev 16442, trunk/theora/lib/dec/dequant.c)
===================================================================
--- trunk/theora/lib/dequant.c	                        (rev 0)
+++ trunk/theora/lib/dequant.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,182 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "dequant.h"
+#include "decint.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
+  th_quant_base *base_mats;
+  long           val;
+  int            nbase_mats;
+  int            sizes[64];
+  int            indices[64];
+  int            nbits;
+  int            bmi;
+  int            ci;
+  int            qti;
+  int            pli;
+  int            qri;
+  int            qi;
+  int            i;
+  val=oc_pack_read(_opb,3);
+  nbits=(int)val;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,9);
+  nbase_mats=(int)val+1;
+  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+  if(base_mats==NULL)return TH_EFAULT;
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++){
+      val=oc_pack_read(_opb,8);
+      base_mats[bmi][ci]=(unsigned char)val;
+    }
+  }
+  nbits=oc_ilog(nbase_mats-1);
+  for(i=0;i<6;i++){
+    th_quant_ranges *qranges;
+    th_quant_base   *qrbms;
+    int             *qrsizes;
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      val=oc_pack_read1(_opb);
+      if(!val){
+        int qtj;
+        int plj;
+        if(qti>0){
+          val=oc_pack_read1(_opb);
+          if(val){
+            qtj=qti-1;
+            plj=pli;
+          }
+          else{
+            qtj=(i-1)/3;
+            plj=(i-1)%3;
+          }
+        }
+        else{
+          qtj=(i-1)/3;
+          plj=(i-1)%3;
+        }
+        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
+        continue;
+      }
+    }
+    val=oc_pack_read(_opb,nbits);
+    indices[0]=(int)val;
+    for(qi=qri=0;qi<63;){
+      val=oc_pack_read(_opb,oc_ilog(62-qi));
+      sizes[qri]=(int)val+1;
+      qi+=(int)val+1;
+      val=oc_pack_read(_opb,nbits);
+      indices[++qri]=(int)val;
+    }
+    /*Note: The caller is responsible for cleaning up any partially
+       constructed qinfo.*/
+    if(qi>63){
+      _ogg_free(base_mats);
+      return TH_EBADHEADER;
+    }
+    qranges->nranges=qri;
+    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+    if(qranges->sizes==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
+    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+    if(qrbms==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    qranges->base_matrices=(const th_quant_base *)qrbms;
+    do{
+      bmi=indices[qri];
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(bmi>=nbase_mats){
+        _ogg_free(base_mats);
+        return TH_EBADHEADER;
+      }
+      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
+    }
+    while(qri-->0);
+  }
+  _ogg_free(base_mats);
+  return 0;
+}
+
+void oc_quant_params_clear(th_quant_info *_qinfo){
+  int i;
+  for(i=6;i-->0;){
+    int qti;
+    int pli;
+    qti=i/3;
+    pli=i%3;
+    /*Clear any duplicate pointer references.*/
+    if(i>0){
+      int qtj;
+      int plj;
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(_qinfo->qi_ranges[qti][pli].sizes==
+       _qinfo->qi_ranges[qtj][plj].sizes){
+        _qinfo->qi_ranges[qti][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[qti][pli].base_matrices==
+       _qinfo->qi_ranges[qtj][plj].base_matrices){
+        _qinfo->qi_ranges[qti][pli].base_matrices=NULL;
+      }
+    }
+    if(qti>0){
+      if(_qinfo->qi_ranges[1][pli].sizes==
+       _qinfo->qi_ranges[0][pli].sizes){
+        _qinfo->qi_ranges[1][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[1][pli].base_matrices==
+       _qinfo->qi_ranges[0][pli].base_matrices){
+        _qinfo->qi_ranges[1][pli].base_matrices=NULL;
+      }
+    }
+    /*Now free all the non-duplicate storage.*/
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
+  }
+}

Copied: trunk/theora/lib/dequant.h (from rev 16442, trunk/theora/lib/dec/dequant.h)
===================================================================
--- trunk/theora/lib/dequant.h	                        (rev 0)
+++ trunk/theora/lib/dequant.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,27 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_dequant_H)
+# define _dequant_H (1)
+# include "quant.h"
+# include "bitpack.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,
+ th_quant_info *_qinfo);
+void oc_quant_params_clear(th_quant_info *_qinfo);
+
+#endif

Copied: trunk/theora/lib/encapiwrapper.c (from rev 16442, trunk/theora/lib/enc/encapiwrapper.c)
===================================================================
--- trunk/theora/lib/encapiwrapper.c	                        (rev 0)
+++ trunk/theora/lib/encapiwrapper.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,168 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "encint.h"
+#include "theora/theoraenc.h"
+
+
+
+static void th_enc_api_clear(th_api_wrapper *_api){
+  if(_api->encode)th_encode_free(_api->encode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_encode_clear(theora_state *_te){
+  if(_te->i!=NULL)theora_info_clear(_te->i);
+  memset(_te,0,sizeof(*_te));
+}
+
+static int theora_encode_control(theora_state *_te,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_encode_granule_frame(theora_state *_te,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_encode_clear,
+  (oc_state_control_func)theora_encode_control,
+  (oc_state_granule_frame_func)theora_encode_granule_frame,
+  (oc_state_granule_time_func)theora_encode_granule_time,
+};
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+  th_api_info *apiinfo;
+  th_info      info;
+  ogg_uint32_t keyframe_frequency_force;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo));
+  if(apiinfo==NULL)return TH_EFAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  oc_theora_info2th_info(&info,_ci);
+  apiinfo->api.encode=th_encode_alloc(&info);
+  if(apiinfo->api.encode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL;
+  _te->internal_decode=NULL;
+  _te->granulepos=0;
+  _te->i=&apiinfo->info;
+  _te->i->codec_setup=&apiinfo->api;
+  /*Set the precise requested keyframe frequency.*/
+  keyframe_frequency_force=_ci->keyframe_auto_p?
+   _ci->keyframe_frequency_force:_ci->keyframe_frequency;
+  th_encode_ctl(apiinfo->api.encode,
+   TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
+   &keyframe_frequency_force,sizeof(keyframe_frequency_force));
+  /*TODO: Additional codec setup using the extra fields in theora_info.*/
+  return 0;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  buf[0].width=_yuv->y_width;
+  buf[0].height=_yuv->y_height;
+  buf[0].stride=_yuv->y_stride;
+  buf[0].data=_yuv->y;
+  buf[1].width=_yuv->uv_width;
+  buf[1].height=_yuv->uv_height;
+  buf[1].stride=_yuv->uv_stride;
+  buf[1].data=_yuv->u;
+  buf[2].width=_yuv->uv_width;
+  buf[2].height=_yuv->uv_height;
+  buf[2].stride=_yuv->uv_stride;
+  buf[2].data=_yuv->v;
+  ret=th_encode_ycbcr_in(api->encode,buf);
+  if(ret<0)return ret;
+  _te->granulepos=api->encode->state.granpos;
+  return ret;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  return th_encode_packetout(api->encode,_last_p,_op);
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output an info packet.*/
+  enc->packet_state=OC_PACKET_INFO_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+  oggpack_buffer  opb;
+  void           *buf;
+  int             packet_state;
+  int             ret;
+  packet_state=OC_PACKET_COMMENT_HDR;
+  oggpackB_writeinit(&opb);
+  ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL,
+   th_version_string(),(th_comment *)_tc,_op);
+  if(ret>=0){
+    /*The oggpack_buffer's lifetime ends with this function, so we have to
+       copy out the packet contents.
+      Presumably the application knows it is supposed to free this.
+      This part works nothing like the Vorbis API, and the documentation on it
+       has been wrong for some time, claiming libtheora owned the memory.*/
+    buf=_ogg_malloc(_op->bytes);
+    if(buf==NULL){
+      _op->packet=NULL;
+      ret=TH_EFAULT;
+    }
+    else{
+      memcpy(buf,_op->packet,_op->bytes);
+      _op->packet=buf;
+      ret=0;
+    }
+  }
+  oggpack_writeclear(&opb);
+  return ret;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output a setup packet.*/
+  enc->packet_state=OC_PACKET_SETUP_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}

Copied: trunk/theora/lib/encfrag.c (from rev 16442, trunk/theora/lib/enc/encfrag.c)
===================================================================
--- trunk/theora/lib/encfrag.c	                        (rev 0)
+++ trunk/theora/lib/encfrag.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,388 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride);
+}
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride){
+  (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride);
+}
+
+void oc_enc_frag_sub_128_c(ogg_int16_t *_diff,
+ const unsigned char *_src,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x,
+ const unsigned char *_y,int _ystride){
+  return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride);
+}
+
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+  return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+  return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,
+   _thresh);
+}
+
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1));
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+  return sad;
+}
+
+static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]-_ref[0]+_src[4]-_ref[4];
+    t4=_src[0]-_ref[0]-_src[4]+_ref[4];
+    t1=_src[1]-_ref[1]+_src[5]-_ref[5];
+    t5=_src[1]-_ref[1]-_src[5]+_ref[5];
+    t2=_src[2]-_ref[2]+_src[6]-_ref[6];
+    t6=_src[2]-_ref[2]-_src[6]+_ref[6];
+    t3=_src[3]-_ref[3]+_src[7]-_ref[7];
+    t7=_src[3]-_ref[3]-_src[7]+_ref[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    r=_ref1[0]+_ref2[0]>>1;
+    t4=_ref1[4]+_ref2[4]>>1;
+    t0=_src[0]-r+_src[4]-t4;
+    t4=_src[0]-r-_src[4]+t4;
+    r=_ref1[1]+_ref2[1]>>1;
+    t5=_ref1[5]+_ref2[5]>>1;
+    t1=_src[1]-r+_src[5]-t5;
+    t5=_src[1]-r-_src[5]+t5;
+    r=_ref1[2]+_ref2[2]>>1;
+    t6=_ref1[6]+_ref2[6]>>1;
+    t2=_src[2]-r+_src[6]-t6;
+    t6=_src[2]-r-_src[6]+t6;
+    r=_ref1[3]+_ref2[3]>>1;
+    t7=_ref1[7]+_ref2[7]>>1;
+    t3=_src[3]-r+_src[7]-t7;
+    t7=_src[3]-r-_src[7]+t7;
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+}
+
+static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]+_src[4];
+    t4=_src[0]-_src[4];
+    t1=_src[1]+_src[5];
+    t5=_src[1]-_src[5];
+    t2=_src[2]+_src[6];
+    t6=_src[2]-_src[6];
+    t3=_src[3]+_src[7];
+    t7=_src[3]-_src[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
+  unsigned    sad;
+  int         t0;
+  int         t1;
+  int         t2;
+  int         t3;
+  int         t4;
+  int         t5;
+  int         t6;
+  int         t7;
+  int         r;
+  int         i;
+  sad=0;
+  for(i=0;i<8;i++){
+    /*Hadamard stage 1:*/
+    t0=_buf[i*8+0]+_buf[i*8+4];
+    t4=_buf[i*8+0]-_buf[i*8+4];
+    t1=_buf[i*8+1]+_buf[i*8+5];
+    t5=_buf[i*8+1]-_buf[i*8+5];
+    t2=_buf[i*8+2]+_buf[i*8+6];
+    t6=_buf[i*8+2]-_buf[i*8+6];
+    t3=_buf[i*8+3]+_buf[i*8+7];
+    t7=_buf[i*8+3]-_buf[i*8+7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    r=abs(t0+t1);
+    r+=abs(t0-t1);
+    r+=abs(t2+t3);
+    r+=abs(t2-t3);
+    r+=abs(t4+t5);
+    r+=abs(t4-t5);
+    r+=abs(t6+t7);
+    r+=abs(t6-t7);
+    sad+=r;
+    if(sad>_thresh)break;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+  return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard(buf,_src,_ref,_ystride);
+  return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+  return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
+   _thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
+  return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride){
+  return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
+}
+
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
+  ogg_int16_t buf[64];
+  oc_intra_hadamard(buf,_src,_ystride);
+  return oc_hadamard_sad_thresh(buf,UINT_MAX)
+   -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+}
+
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride);
+}
+
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  int i;
+  int j;
+  for(i=8;i-->0;){
+    for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1;
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){
+  (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue);
+}
+
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue);
+}

Copied: trunk/theora/lib/encinfo.c (from rev 16442, trunk/theora/lib/enc/encinfo.c)
===================================================================
--- trunk/theora/lib/encinfo.c	                        (rev 0)
+++ trunk/theora/lib/encinfo.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,121 @@
+#include <stdlib.h>
+#include <string.h>
+#include "internal.h"
+#include "enquant.h"
+#include "huffenc.h"
+
+
+
+/*Packs a series of octets from a given byte array into the pack buffer.
+  _opb: The pack buffer to store the octets in.
+  _buf: The byte array containing the bytes to pack.
+  _len: The number of octets to pack.*/
+static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){
+  int i;
+  for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8);
+}
+
+
+
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op){
+  unsigned char *packet;
+  int            b_o_s;
+  if(_op==NULL)return TH_EFAULT;
+  switch(*_packet_state){
+    /*Codec info header.*/
+    case OC_PACKET_INFO_HDR:{
+      if(_state==NULL)return TH_EFAULT;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the info header.*/
+      oggpackB_write(_opb,0x80,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the codec bitstream version.*/
+      oggpackB_write(_opb,TH_VERSION_MAJOR,8);
+      oggpackB_write(_opb,TH_VERSION_MINOR,8);
+      oggpackB_write(_opb,TH_VERSION_SUB,8);
+      /*Describe the encoded frame.*/
+      oggpackB_write(_opb,_state->info.frame_width>>4,16);
+      oggpackB_write(_opb,_state->info.frame_height>>4,16);
+      oggpackB_write(_opb,_state->info.pic_width,24);
+      oggpackB_write(_opb,_state->info.pic_height,24);
+      oggpackB_write(_opb,_state->info.pic_x,8);
+      oggpackB_write(_opb,_state->info.pic_y,8);
+      oggpackB_write(_opb,_state->info.fps_numerator,32);
+      oggpackB_write(_opb,_state->info.fps_denominator,32);
+      oggpackB_write(_opb,_state->info.aspect_numerator,24);
+      oggpackB_write(_opb,_state->info.aspect_denominator,24);
+      oggpackB_write(_opb,_state->info.colorspace,8);
+      oggpackB_write(_opb,_state->info.target_bitrate,24);
+      oggpackB_write(_opb,_state->info.quality,6);
+      oggpackB_write(_opb,_state->info.keyframe_granule_shift,5);
+      oggpackB_write(_opb,_state->info.pixel_fmt,2);
+      /*Spare configuration bits.*/
+      oggpackB_write(_opb,0,3);
+      b_o_s=1;
+    }break;
+    /*Comment header.*/
+    case OC_PACKET_COMMENT_HDR:{
+      int vendor_len;
+      int i;
+      if(_tc==NULL)return TH_EFAULT;
+      vendor_len=strlen(_vendor);
+      oggpackB_reset(_opb);
+      /*Mark this packet as the comment header.*/
+      oggpackB_write(_opb,0x81,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the vendor string.*/
+      oggpack_write(_opb,vendor_len,32);
+      oc_pack_octets(_opb,_vendor,vendor_len);
+      oggpack_write(_opb,_tc->comments,32);
+      for(i=0;i<_tc->comments;i++){
+        if(_tc->user_comments[i]!=NULL){
+          oggpack_write(_opb,_tc->comment_lengths[i],32);
+          oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]);
+        }
+        else oggpack_write(_opb,0,32);
+      }
+      b_o_s=0;
+    }break;
+    /*Codec setup header.*/
+    case OC_PACKET_SETUP_HDR:{
+      int ret;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the setup header.*/
+      oggpackB_write(_opb,0x82,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the quantizer tables.*/
+      oc_quant_params_pack(_opb,_qinfo);
+      /*Write the huffman codes.*/
+      ret=oc_huff_codes_pack(_opb,_codes);
+      /*This should never happen, because we validate the tables when they
+         are set.
+        If you see, it's a good chance memory is being corrupted.*/
+      if(ret<0)return ret;
+      b_o_s=0;
+    }break;
+    /*No more headers to emit.*/
+    default:return 0;
+  }
+  /*This is kind of fugly: we hand the user a buffer which they do not own.
+    We will overwrite it when the next packet is output, so the user better be
+     done with it by then.
+    Vorbis is little better: it hands back buffers that it will free the next
+     time the headers are requested, or when the encoder is cleared.
+    Hopefully libogg2 will make this much cleaner.*/
+  packet=oggpackB_get_buffer(_opb);
+  /*If there's no packet, malloc failed while writing.*/
+  if(packet==NULL)return TH_EFAULT;
+  _op->packet=packet;
+  _op->bytes=oggpackB_bytes(_opb);
+  _op->b_o_s=b_o_s;
+  _op->e_o_s=0;
+  _op->granulepos=0;
+  _op->packetno=*_packet_state+3;
+  return ++(*_packet_state)+3;
+}

Copied: trunk/theora/lib/encint.h (from rev 16442, trunk/theora/lib/enc/encint.h)
===================================================================
--- trunk/theora/lib/encint.h	                        (rev 0)
+++ trunk/theora/lib/encint.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,489 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#if !defined(_encint_H)
+# define _encint_H (1)
+# if defined(HAVE_CONFIG_H)
+#  include "config.h"
+# endif
+# include "theora/theoraenc.h"
+# include "internal.h"
+# include "ocintrin.h"
+# include "mathops.h"
+# include "enquant.h"
+# include "huffenc.h"
+/*# define OC_COLLECT_METRICS*/
+
+
+
+typedef oc_mv                         oc_mv2[2];
+
+typedef struct oc_enc_opt_vtable      oc_enc_opt_vtable;
+typedef struct oc_mb_enc_info         oc_mb_enc_info;
+typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
+typedef struct oc_iir_filter          oc_iir_filter;
+typedef struct oc_log_linear_fit      oc_log_linear_fit;
+typedef struct oc_frame_metrics       oc_frame_metrics;
+typedef struct oc_rc_state            oc_rc_state;
+typedef struct th_enc_ctx             oc_enc_ctx;
+typedef struct oc_token_checkpoint    oc_token_checkpoint;
+
+
+
+/*Constants for the packet-out state machine specific to the encoder.*/
+
+/*Next packet to emit: Data packet, but none are ready yet.*/
+#define OC_PACKET_EMPTY (0)
+/*Next packet to emit: Data packet, and one is ready.*/
+#define OC_PACKET_READY (1)
+
+
+
+/*The bits used for each of the MB mode codebooks.*/
+extern const unsigned char OC_MODE_BITS[2][OC_NMODES];
+
+/*The bits used for each of the MV codebooks.*/
+extern const unsigned char OC_MV_BITS[2][64];
+
+/*The minimum value that can be stored in a SB run for each codeword.
+  The last entry is the upper bound on the length of a single SB run.*/
+extern const ogg_uint16_t  OC_SB_RUN_VAL_MIN[8];
+/*The bits used for each SB run codeword.*/
+extern const unsigned char OC_SB_RUN_CODE_NBITS[7];
+
+/*The bits used for each block run length (starting with 1).*/
+extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
+
+
+
+/*Encoder specific functions with accelerated variants.*/
+struct oc_enc_opt_vtable{
+  unsigned (*frag_sad)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_sad_thresh)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,unsigned _thresh);
+  unsigned (*frag_sad2_thresh)(const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+   unsigned _thresh);
+  unsigned (*frag_satd_thresh)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,unsigned _thresh);
+  unsigned (*frag_satd2_thresh)(const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+   unsigned _thresh);
+  unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
+  void     (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  void     (*frag_sub_128)(ogg_int16_t _diff[64],
+   const unsigned char *_src,int _ystride);
+  void     (*frag_copy2)(unsigned char *_dst,
+   const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+  void     (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void     (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void     (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+};
+
+
+void oc_enc_vtable_init(oc_enc_ctx *_enc);
+
+
+
+/*Encoder-specific macroblock information.*/
+struct oc_mb_enc_info{
+  /*Neighboring macro blocks that have MVs available from the current frame.*/
+  unsigned      cneighbors[4];
+  /*Neighboring macro blocks to use for MVs from the previous frame.*/
+  unsigned      pneighbors[4];
+  /*The number of current-frame neighbors.*/
+  unsigned char ncneighbors;
+  /*The number of previous-frame neighbors.*/
+  unsigned char npneighbors;
+  /*Flags indicating which MB modes have been refined.*/
+  unsigned char refined;
+  /*Motion vectors for a macro block for the current frame and the
+     previous two frames.
+    Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which
+     can be used to estimate constant velocity and constant acceleration
+     predictors.
+    Uninitialized MVs are (0,0).*/
+  oc_mv2        analysis_mv[3];
+  /*Current unrefined analysis MVs.*/
+  oc_mv         unref_mv[2];
+  /*Unrefined block MVs.*/
+  oc_mv         block_mv[4];
+  /*Refined block MVs.*/
+  oc_mv         ref_mv[4];
+  /*Minimum motion estimation error from the analysis stage.*/
+  ogg_uint16_t  error[2];
+  /*MB error for half-pel refinement for each frame type.*/
+  unsigned      satd[2];
+  /*Block error for half-pel refinement.*/
+  unsigned      block_satd[4];
+};
+
+
+
+/*State machine to estimate the opportunity cost of coding a MB mode.*/
+struct oc_mode_scheme_chooser{
+  /*Pointers to the a list containing the index of each mode in the mode
+     alphabet used by each scheme.
+    The first entry points to the dynamic scheme0_ranks, while the remaining 7
+     point to the constant entries stored in OC_MODE_SCHEMES.*/
+  const unsigned char *mode_ranks[8];
+  /*The ranks for each mode when coded with scheme 0.
+    These are optimized so that the more frequent modes have lower ranks.*/
+  unsigned char        scheme0_ranks[OC_NMODES];
+  /*The list of modes, sorted in descending order of frequency, that
+    corresponds to the ranks above.*/
+  unsigned char        scheme0_list[OC_NMODES];
+  /*The number of times each mode has been chosen so far.*/
+  int                  mode_counts[OC_NMODES];
+  /*The list of mode coding schemes, sorted in ascending order of bit cost.*/
+  unsigned char        scheme_list[8];
+  /*The number of bits used by each mode coding scheme.*/
+  ptrdiff_t            scheme_bits[8];
+};
+
+
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+
+
+
+/*A 2nd order low-pass Bessel follower.
+  We use this for rate control because it has fast reaction time, but is
+   critically damped.*/
+struct oc_iir_filter{
+  ogg_int32_t c[2];
+  ogg_int64_t g;
+  ogg_int32_t x[2];
+  ogg_int32_t y[2];
+};
+
+
+
+/*A linear fit for the log-domain scale factors used in 2-pass.*/
+struct oc_log_linear_fit{
+  ogg_int64_t  x;
+  ogg_int64_t  y;
+  ogg_int64_t  x2;
+  ogg_int64_t  xy;
+  ogg_uint32_t n;
+};
+
+
+
+/*The 2-pass metrics associated with a single frame.*/
+struct oc_frame_metrics{
+  ogg_int32_t   scale;
+  unsigned      dup_count:31;
+  unsigned      frame_type:1;
+};
+
+
+
+/*Rate control state information.*/
+struct oc_rc_state{
+  /*The target average bits per frame.*/
+  ogg_int64_t        bits_per_frame;
+  /*The current buffer fullness (bits available to be used).*/
+  ogg_int64_t        fullness;
+  /*The target buffer fullness.
+    This is where we'd like to be by the last keyframe the appears in the next
+     buf_delay frames.*/
+  ogg_int64_t        target;
+  /*The maximum buffer fullness (total size of the buffer).*/
+  ogg_int64_t        max;
+  /*The log of the number of pixels in a frame in Q57 format.*/
+  ogg_int64_t        log_npixels;
+  /*The exponent used in the rate model in Q8 format.*/
+  unsigned           exp[2];
+  /*The number of frames to distribute the buffer usage over.*/
+  int                buf_delay;
+  /*The total drop count from the previous frame.
+    This includes duplicates explicitly requested via the
+     TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/
+  ogg_uint32_t       prev_drop_count;
+  /*The log of an estimated scale factor used to obtain the real framerate, for
+     VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/
+  ogg_int64_t        log_drop_scale;
+  /*The log of estimated scale factor for the rate model in Q57 format.*/
+  ogg_int64_t        log_scale[2];
+  /*The log of the target quantizer level in Q57 format.*/
+  ogg_int64_t        log_qtarget;
+  /*Will we drop frames to meet bitrate target?*/
+  unsigned char      drop_frames;
+  /*Do we respect the maximum buffer fullness?*/
+  unsigned char      cap_overflow;
+  /*Can the reservoir go negative?*/
+  unsigned char      cap_underflow;
+  /*Second-order lowpass filters to track scale and VFR.*/
+  oc_iir_filter      scalefilter[2];
+  oc_iir_filter      vfrfilter;
+  /*Two-pass mode state.
+    0 => 1-pass encoding.
+    1 => 1st pass of 2-pass encoding.
+    2 => 2nd pass of 2-pass encoding.*/
+  int                twopass;
+  /*Buffer for current frame metrics.*/
+  unsigned char      twopass_buffer[48];
+  /*The number of bytes in the frame metrics buffer.
+    When 2-pass encoding is enabled, this is set to 0 after each frame is
+     submitted, and must be non-zero before the next frame will be accepted.*/
+  int                twopass_buffer_bytes;
+  int                twopass_buffer_fill;
+  /*Whether or not to force the next frame to be a keyframe.*/
+  unsigned char      twopass_force_kf;
+  /*The metrics for the previous frame.*/
+  oc_frame_metrics   prev_metrics;
+  /*The metrics for the current frame.*/
+  oc_frame_metrics   cur_metrics;
+  /*The buffered metrics for future frames.*/
+  oc_frame_metrics  *frame_metrics;
+  int                nframe_metrics;
+  int                cframe_metrics;
+  /*The index of the current frame in the circular metric buffer.*/
+  int                frame_metrics_head;
+  /*The frame count of each type (keyframes, delta frames, and dup frames);
+     32 bits limits us to 2.268 years at 60 fps.*/
+  ogg_uint32_t       frames_total[3];
+  /*The number of frames of each type yet to be processed.*/
+  ogg_uint32_t       frames_left[3];
+  /*The sum of the scale values for each frame type.*/
+  ogg_int64_t        scale_sum[2];
+  /*The start of the window over which the current scale sums are taken.*/
+  int                scale_window0;
+  /*The end of the window over which the current scale sums are taken.*/
+  int                scale_window_end;
+  /*The frame count of each type in the current 2-pass window; this does not
+     include dup frames.*/
+  int                nframes[3];
+  /*Bias correction fits for the 1st-pass scale factors.*/
+  oc_log_linear_fit  corr[2];
+};
+
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc);
+void oc_rc_state_clear(oc_rc_state *_rc);
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc);
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp);
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type);
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable);
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf);
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes);
+
+
+
+/*The internal encoder state.*/
+struct th_enc_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state          state;
+  /*Buffer in which to assemble packets.*/
+  oggpack_buffer           opb;
+  /*Encoder-specific macroblock information.*/
+  oc_mb_enc_info          *mb_info;
+  /*DC coefficients after prediction.*/
+  ogg_int16_t             *frag_dc;
+  /*The list of coded macro blocks, in coded order.*/
+  unsigned                *coded_mbis;
+  /*The number of coded macro blocks.*/
+  size_t                   ncoded_mbis;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and becomes
+     positive when a frame has been processed and data packets are ready.*/
+  int                      packet_state;
+  /*The maximum distance between keyframes.*/
+  ogg_uint32_t             keyframe_frequency_force;
+  /*The number of duplicates to produce for the next frame.*/
+  ogg_uint32_t             dup_count;
+  /*The number of duplicates remaining to be emitted for the current frame.*/
+  ogg_uint32_t             nqueued_dups;
+  /*The number of duplicates emitted for the last frame.*/
+  ogg_uint32_t             prev_dup_count;
+  /*Whether or not VP3 compatibility mode has been enabled.*/
+  unsigned char            vp3_compatible;
+  /*Whether or not any INTER frames have been coded.*/
+  unsigned char            coded_inter_frame;
+  /*Whether or not previous frame was dropped.*/
+  unsigned char            prevframe_dropped;
+  /*Stores most recently chosen Huffman tables for each frame type, DC and AC
+     coefficients, and luma and chroma tokens.
+    The actual Huffman table used for a given coefficient depends not only on
+     the choice made here, but also its index in the zig-zag ordering.*/
+  unsigned char            huff_idxs[2][2][2];
+  /*Current count of bits used by each MV coding mode.*/
+  size_t                   mv_bits[2];
+  /*The mode scheme chooser for estimating mode coding costs.*/
+  oc_mode_scheme_chooser   chooser;
+  /*The number of vertical super blocks in an MCU.*/
+  int                      mcu_nvsbs;
+  /*The SSD error for skipping each fragment in the current MCU.*/
+  unsigned                *mcu_skip_ssd;
+  /*The DCT token lists for each coefficient and each plane.*/
+  unsigned char          **dct_tokens[3];
+  /*The extra bits associated with each DCT token.*/
+  ogg_uint16_t           **extra_bits[3];
+  /*The number of DCT tokens for each coefficient for each plane.*/
+  ptrdiff_t                ndct_tokens[3][64];
+  /*Pending EOB runs for each coefficient for each plane.*/
+  ogg_uint16_t             eob_run[3][64];
+  /*The offset of the first DCT token for each coefficient for each plane.*/
+  unsigned char            dct_token_offs[3][64];
+  /*The last DC coefficient for each plane and reference frame.*/
+  int                      dc_pred_last[3][3];
+#if defined(OC_COLLECT_METRICS)
+  /*Fragment SATD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_satd;
+  /*Fragment SSD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_ssd;
+#endif
+  /*The R-D optimization parameter.*/
+  int                      lambda;
+  /*The huffman tables in use.*/
+  th_huff_code             huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+  /*The quantization parameters in use.*/
+  th_quant_info            qinfo;
+  oc_iquant               *enquant_tables[64][3][2];
+  oc_iquant_table          enquant_table_data[64][3][2];
+  /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
+     value.
+    This is used to paramterize the rate control decisions.
+    They are kept in the log domain to simplify later processing.
+    Keep in mind these are DCT domain quantizers, and so are scaled by an
+     additional factor of 4 from the pixel domain.*/
+  ogg_int64_t              log_qavg[2][64];
+  /*The buffer state used to drive rate control.*/
+  oc_rc_state              rc;
+  /*Table for encoder acceleration functions.*/
+  oc_enc_opt_vtable        opt_vtable;
+};
+
+
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
+#if defined(OC_COLLECT_METRICS)
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
+#endif
+
+
+
+/*Perform fullpel motion search for a single MB against both reference frames.*/
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi);
+/*Refine a MB MV for one frame.*/
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame);
+/*Refine the block MVs.*/
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi);
+
+
+
+/*Used to rollback a tokenlog transaction when we retroactively decide to skip
+   a fragment.
+  A checkpoint is taken right before each token is added.*/
+struct oc_token_checkpoint{
+  /*The color plane the token was added to.*/
+  unsigned char pli;
+  /*The zig-zag index the token was added to.*/
+  unsigned char zzi;
+  /*The outstanding EOB run count before the token was added.*/
+  ogg_uint16_t  eob_run;
+  /*The token count before the token was added.*/
+  ptrdiff_t     ndct_tokens;
+};
+
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc);
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _acmin);
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n);
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend);
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1);
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc);
+
+
+
+/*Utility routine to encode one of the header packets.*/
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op);
+
+
+
+/*Encoder-specific accelerated functions.*/
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
+ const ogg_int16_t _x[64]);
+
+/*Default pure-C implementations.*/
+void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif

Copied: trunk/theora/lib/encode.c (from rev 16442, trunk/theora/lib/enc/encode.c)
===================================================================
--- trunk/theora/lib/encode.c	                        (rev 0)
+++ trunk/theora/lib/encode.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,1621 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+#if defined(OC_X86_ASM)
+# include "x86/x86enc.h"
+#endif
+
+
+
+/*The default quantization parameters used by VP3.1.*/
+static const int OC_VP31_RANGE_SIZES[1]={63};
+static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTRA_C[2]={
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTER[2]={
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_VP31_QUANT_INFO={
+  {
+    220,200,190,180,170,170,160,160,
+    150,150,140,140,130,130,120,120,
+    110,110,100,100, 90, 90, 90, 80,
+     80, 80, 70, 70, 70, 60, 60, 60,
+     60, 50, 50, 50, 50, 40, 40, 40,
+     40, 40, 30, 30, 30, 30, 30, 30,
+     30, 20, 20, 20, 20, 20, 20, 20,
+     20, 10, 10, 10, 10, 10, 10, 10
+  },
+  {
+    500,450,400,370,340,310,285,265,
+    245,225,210,195,185,180,170,160,
+    150,145,135,130,125,115,110,107,
+    100, 96, 93, 89, 85, 82, 75, 74,
+     70, 68, 64, 60, 57, 56, 52, 50,
+     49, 45, 44, 43, 40, 38, 37, 35,
+     33, 32, 30, 29, 28, 25, 24, 22,
+     21, 19, 18, 17, 15, 13, 12, 10
+  },
+  {
+    30,25,20,20,15,15,14,14,
+    13,13,12,12,11,11,10,10,
+     9, 9, 8, 8, 7, 7, 7, 7,
+     6, 6, 6, 6, 5, 5, 5, 5,
+     4, 4, 4, 4, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}
+    },
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}
+    }
+  }
+};
+
+/*The current default quantization parameters.*/
+static const int OC_DEF_QRANGE_SIZES[3]={32,16,15};
+static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={
+  {
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+  },
+  {
+     15, 12, 12, 15, 18, 20, 20, 21,
+     13, 13, 14, 17, 18, 21, 21, 20,
+     14, 14, 15, 18, 20, 21, 21, 21,
+     14, 16, 17, 19, 20, 21, 21, 21,
+     16, 17, 20, 21, 21, 21, 21, 21,
+     18, 19, 20, 21, 21, 21, 21, 21,
+     20, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     16, 12, 11, 16, 20, 25, 27, 28,
+     13, 13, 14, 18, 21, 28, 28, 27,
+     14, 13, 16, 20, 25, 28, 28, 28,
+     14, 16, 19, 22, 27, 29, 29, 28,
+     17, 19, 25, 28, 28, 30, 30, 29,
+     20, 24, 27, 28, 29, 30, 30, 29,
+     27, 28, 29, 29, 30, 30, 30, 30,
+     29, 29, 29, 29, 30, 30, 30, 29
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTRA_C[4]={
+  {
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19
+  },
+  {
+     18, 18, 21, 25, 26, 26, 26, 26,
+     18, 20, 22, 26, 26, 26, 26, 26,
+     21, 22, 25, 26, 26, 26, 26, 26,
+     25, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26
+  },
+  {
+     17, 18, 22, 31, 36, 36, 36, 36,
+     18, 20, 24, 34, 36, 36, 36, 36,
+     22, 24, 33, 36, 36, 36, 36, 36,
+     31, 34, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTER[4]={
+  {
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     18, 18, 18, 21, 23, 24, 25, 27,
+     18, 18, 21, 23, 24, 25, 27, 28,
+     18, 21, 23, 24, 25, 27, 28, 29,
+     21, 23, 24, 25, 27, 28, 29, 29,
+     23, 24, 25, 27, 28, 29, 29, 29,
+     24, 25, 27, 28, 29, 29, 29, 30,
+     25, 27, 28, 29, 29, 29, 30, 30,
+     27, 28, 29, 29, 29, 30, 30, 30
+  },
+  {
+     17, 17, 17, 20, 23, 26, 28, 32,
+     17, 17, 20, 23, 26, 28, 32, 34,
+     17, 20, 23, 26, 28, 32, 34, 37,
+     20, 23, 26, 28, 32, 34, 37, 37,
+     23, 26, 28, 32, 34, 37, 37, 37,
+     26, 28, 32, 34, 37, 37, 37, 41,
+     28, 32, 34, 37, 37, 37, 41, 42,
+     32, 34, 37, 37, 37, 41, 42, 42
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_DEF_QUANT_INFO={
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    30,25,20,20,15,15,14,14,
+    13,13,12,12,11,11,10,10,
+     9, 9, 8, 8, 7, 7, 7, 7,
+     6, 6, 6, 6, 5, 5, 5, 5,
+     4, 4, 4, 4, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}
+    },
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}
+    }
+  }
+};
+
+
+
+/*The Huffman codes used for macro block modes.*/
+
+const unsigned char OC_MODE_BITS[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {1,2,3,4,5,6,7,7},
+  /*Codebook 1: a fixed-length code.*/
+  {3,3,3,3,3,3,3,3}
+};
+
+static const unsigned char OC_MODE_CODES[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F},
+  /*Codebook 1: a fixed-length code.*/
+  {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07}
+};
+
+
+/*The Huffman codes used for motion vectors.*/
+
+const unsigned char OC_MV_BITS[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+      8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3,
+    3,
+    3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).
+    This wastes a code word (0x01, negative zero), or a bit (0x00, positive
+     zero, requires only 5 bits to uniquely decode), but is hopefully not used
+     very often.*/
+  {
+      6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
+  }
+};
+
+static const unsigned char OC_MV_CODES[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+         0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3,
+    0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3,
+    0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63,
+    0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02,
+    0x00,
+    0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60,
+    0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0,
+    0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0,
+    0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).*/
+  {
+         0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33,
+    0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23,
+    0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13,
+    0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03,
+    0x00,
+    0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10,
+    0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20,
+    0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30,
+    0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E
+  }
+};
+
+
+
+/*Super block run coding scheme:
+   Codeword             Run Length
+   0                       1
+   10x                     2-3
+   110x                    4-5
+   1110xx                  6-9
+   11110xxx                10-17
+   111110xxxx              18-33
+   111111xxxxxxxxxxxx      34-4129*/
+const ogg_uint16_t    OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130};
+static const unsigned OC_SB_RUN_CODE_PREFIX[7]={
+  0,4,0xC,0x38,0xF0,0x3E0,0x3F000
+};
+const unsigned char   OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18};
+
+
+/*Writes the bit pattern for the run length of a super block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run, which must be positive.
+  _flag:      The current flag.
+  _done:      Whether or not more flags are to be encoded.*/
+static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count,
+ int _flag,int _done){
+  int i;
+  if(_run_count>=4129){
+    do{
+      oggpackB_write(_opb,0x3FFFF,18);
+      _run_count-=4129;
+      if(_run_count>0)oggpackB_write(_opb,_flag,1);
+      else if(!_done)oggpackB_write(_opb,!_flag,1);
+    }
+    while(_run_count>=4129);
+    if(_run_count<=0)return;
+  }
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i],
+   OC_SB_RUN_CODE_NBITS[i]);
+}
+
+
+
+/*Block run coding scheme:
+   Codeword             Run Length
+   0x                      1-2
+   10x                     3-4
+   110x                    5-6
+   1110xx                  7-10
+   11110xx                 11-14
+   11111xxxx               15-30*/
+const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={
+  2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
+};
+static const ogg_uint16_t  OC_BLOCK_RUN_CODE_PATTERN[30]={
+        0x000,0x001,0x004,0x005,0x00C,0x00D,0x038,
+  0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0,
+  0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8,
+  0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF
+};
+
+
+/*Writes the bit pattern for the run length of a block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run.
+              This must be positive, and no more than 30.*/
+static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){
+  oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1],
+   OC_BLOCK_RUN_CODE_NBITS[_run_count-1]);
+}
+
+
+
+static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){
+  /*Mark this as a data packet.*/
+  oggpackB_write(&_enc->opb,0,1);
+  /*Output the frame type (key frame or delta frame).*/
+  oggpackB_write(&_enc->opb,_enc->state.frame_type,1);
+  /*Write out the current qi list.*/
+  oggpackB_write(&_enc->opb,_enc->state.qis[0],6);
+  if(_enc->state.nqis>1){
+    oggpackB_write(&_enc->opb,1,1);
+    oggpackB_write(&_enc->opb,_enc->state.qis[1],6);
+    if(_enc->state.nqis>2){
+      oggpackB_write(&_enc->opb,1,1);
+      oggpackB_write(&_enc->opb,_enc->state.qis[2],6);
+    }
+    else oggpackB_write(&_enc->opb,0,1);
+  }
+  else oggpackB_write(&_enc->opb,0,1);
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    /*Key frames have 3 unused configuration bits, holdovers from the VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      Monty kept these to leave us some wiggle room for future expansion,
+       though a single bit in all frames would have been far more useful.*/
+    oggpackB_write(&_enc->opb,0,3);
+  }
+}
+
+/*Writes the bit flags for whether or not each super block is partially coded
+   or not.
+  These flags are run-length encoded, with the flag value alternating between
+   each run.
+  Return: The number partially coded SBs.*/
+static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  flag=sb_flags[0].coded_partially;
+  oggpackB_write(&_enc->opb,flag,1);
+  sbi=npartial=0;
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially!=flag)break;
+      run_count++;
+      npartial+=flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+  return npartial;
+}
+
+/*Writes the coded/not coded flags for each super block that is not partially
+   coded.
+  These flags are run-length encoded, with the flag value altenating between
+   each run.*/
+static void oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  /*Skip partially coded super blocks; their flags have already been coded.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  flag=sb_flags[sbi].coded_fully;
+  oggpackB_write(&_enc->opb,flag,1);
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(sb_flags[sbi].coded_fully!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+}
+
+static void oc_enc_coded_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  const oc_fragment *frags;
+  unsigned           npartial;
+  int                run_count;
+  int                flag;
+  int                pli;
+  unsigned           sbi;
+  npartial=oc_enc_partial_sb_flags_pack(_enc);
+  if(npartial<_enc->state.nsbs)oc_enc_coded_sb_flags_pack(_enc);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  frags=_enc->state.frags;
+  for(sbi=0;sbi<nsbs&&!sb_flags[sbi].coded_partially;sbi++);
+  /*If there's at least one partial SB, store individual coded block flags.*/
+  if(sbi<nsbs){
+    flag=frags[sb_maps[sbi][0][0]].coded;
+    oggpackB_write(&_enc->opb,flag,1);
+    run_count=0;
+    nsbs=sbi=0;
+    for(pli=0;pli<3;pli++){
+      nsbs+=_enc->state.fplanes[pli].nsbs;
+      for(;sbi<nsbs;sbi++){
+        int       quadi;
+        int       bi;
+        ptrdiff_t fragi;
+        if(sb_flags[sbi].coded_partially){
+          for(quadi=0;quadi<4;quadi++){
+            for(bi=0;bi<4;bi++){
+              fragi=sb_maps[sbi][quadi][bi];
+              if(fragi>=0){
+                if(frags[fragi].coded!=flag){
+                  oc_block_run_pack(&_enc->opb,run_count);
+                  flag=!flag;
+                  run_count=1;
+                }
+                else run_count++;
+              }
+            }
+          }
+        }
+      }
+    }
+    /*Flush any trailing block coded run.*/
+    if(run_count>0)oc_block_run_pack(&_enc->opb,run_count);
+  }
+}
+
+static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
+  const unsigned char *mode_codes;
+  const unsigned char *mode_bits;
+  const unsigned char *mode_ranks;
+  unsigned            *coded_mbis;
+  size_t               ncoded_mbis;
+  const signed char   *mb_modes;
+  unsigned             mbii;
+  int                  scheme;
+  int                  mb_mode;
+  scheme=_enc->chooser.scheme_list[0];
+  /*Encode the best scheme.*/
+  oggpackB_write(&_enc->opb,scheme,3);
+  /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/
+  if(scheme==0){
+    for(mb_mode=0;mb_mode<OC_NMODES;mb_mode++){
+      oggpackB_write(&_enc->opb,_enc->chooser.scheme0_ranks[mb_mode],3);
+    }
+  }
+  mode_ranks=_enc->chooser.mode_ranks[scheme];
+  mode_bits=OC_MODE_BITS[scheme+1>>3];
+  mode_codes=OC_MODE_CODES[scheme+1>>3];
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    int rank;
+    rank=mode_ranks[mb_modes[coded_mbis[mbii]]];
+    oggpackB_write(&_enc->opb,mode_codes[rank],mode_bits[rank]);
+  }
+}
+
+static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,int _dx,int _dy){
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][_dx+31],OC_MV_BITS[_mv_scheme][_dx+31]);
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][_dy+31],OC_MV_BITS[_mv_scheme][_dy+31]);
+}
+
+static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
+  const unsigned     *coded_mbis;
+  size_t              ncoded_mbis;
+  const oc_mb_map    *mb_maps;
+  const signed char  *mb_modes;
+  const oc_fragment  *frags;
+  const oc_mv        *frag_mvs;
+  unsigned            mbii;
+  int                 mv_scheme;
+  /*Choose the coding scheme.*/
+  mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0];
+  oggpackB_write(&_enc->opb,mv_scheme,1);
+  /*Encode the motion vectors.
+    Macro blocks are iterated in Hilbert scan order, but the MVs within the
+     macro block are coded in raster order.*/
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  frags=_enc->state.frags;
+  frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    ptrdiff_t fragi;
+    unsigned  mbi;
+    int       bi;
+    mbi=coded_mbis[mbii];
+    switch(mb_modes[mbi]){
+      case OC_MODE_INTER_MV:
+      case OC_MODE_GOLDEN_MV:{
+        for(bi=0;;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,
+             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+            /*Only code a single MV for this macro block.*/
+            break;
+          }
+        }
+      }break;
+      case OC_MODE_INTER_MV_FOUR:{
+        for(bi=0;bi<4;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,
+             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+            /*Keep coding all the MVs for this macro block.*/
+          }
+        }
+      }break;
+    }
+  }
+}
+
+static void oc_enc_block_qis_pack(oc_enc_ctx *_enc){
+  const oc_fragment *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  ptrdiff_t          run_count;
+  ptrdiff_t          nqi0;
+  int                flag;
+  if(_enc->state.nqis<=1)return;
+  ncoded_fragis=_enc->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  coded_fragis=_enc->state.coded_fragis;
+  frags=_enc->state.frags;
+  flag=!!frags[coded_fragis[0]].qii;
+  oggpackB_write(&_enc->opb,flag,1);
+  nqi0=0;
+  for(fragii=0;fragii<ncoded_fragis;){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      if(!!frags[coded_fragis[fragii]].qii!=flag)break;
+      run_count++;
+      nqi0+=!flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+  if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return;
+  for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++);
+  flag=frags[coded_fragis[fragii]].qii-1;
+  oggpackB_write(&_enc->opb,flag,1);
+  while(fragii<ncoded_fragis){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      int qii;
+      qii=frags[coded_fragis[fragii]].qii;
+      if(!qii)continue;
+      if(qii-1!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+}
+
+/*Counts the tokens of each type used for the given range of coefficient
+   indices in zig-zag order.
+  _zzi_start:      The first zig-zag index to include.
+  _zzi_end:        The first zig-zag index to not include.
+  _token_counts_y: Returns the token counts for the Y' plane.
+  _token_counts_c: Returns the token counts for the Cb and Cr planes.*/
+static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end,
+ ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){
+  const unsigned char *dct_tokens;
+  ptrdiff_t            ndct_tokens;
+  int                  pli;
+  int                  zzi;
+  ptrdiff_t            ti;
+  memset(_token_counts_y,0,32*sizeof(*_token_counts_y));
+  memset(_token_counts_c,0,32*sizeof(*_token_counts_c));
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    dct_tokens=_enc->dct_tokens[0][zzi];
+    ndct_tokens=_enc->ndct_tokens[0][zzi];
+    for(ti=_enc->dct_token_offs[0][zzi];ti<ndct_tokens;ti++){
+      _token_counts_y[dct_tokens[ti]]++;
+    }
+  }
+  for(pli=1;pli<3;pli++){
+    for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        _token_counts_c[dct_tokens[ti]]++;
+      }
+    }
+  }
+}
+
+/*Computes the number of bits used for each of the potential Huffman code for
+   the given list of token counts.
+  The bits are added to whatever the current bit counts are.*/
+static void oc_enc_count_bits(oc_enc_ctx *_enc,int _hgi,
+ const ptrdiff_t _token_counts[32],size_t _bit_counts[16]){
+  int huffi;
+  int huff_offs;
+  int token;
+  huff_offs=_hgi<<4;
+  for(huffi=0;huffi<16;huffi++){
+    for(token=0;token<32;token++){
+      _bit_counts[huffi]+=
+       _token_counts[token]*_enc->huff_codes[huffi+huff_offs][token].nbits;
+    }
+  }
+}
+
+/*Returns the Huffman index using the fewest number of bits.*/
+static int oc_select_huff_idx(size_t _bit_counts[16]){
+  int best_huffi;
+  int huffi;
+  best_huffi=0;
+  for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){
+    best_huffi=huffi;
+  }
+  return best_huffi;
+}
+
+static void oc_enc_huff_group_pack(oc_enc_ctx *_enc,
+ int _zzi_start,int _zzi_end,const int _huff_idxs[2]){
+  int zzi;
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    int pli;
+    for(pli=0;pli<3;pli++){
+      const unsigned char *dct_tokens;
+      const ogg_uint16_t  *extra_bits;
+      ptrdiff_t            ndct_tokens;
+      const th_huff_code  *huff_codes;
+      ptrdiff_t            ti;
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      extra_bits=_enc->extra_bits[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        int token;
+        int neb;
+        token=dct_tokens[ti];
+        oggpackB_write(&_enc->opb,huff_codes[token].pattern,
+         huff_codes[token].nbits);
+        neb=OC_DCT_TOKEN_EXTRA_BITS[token];
+        if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb);
+      }
+    }
+  }
+}
+
+static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){
+  static const unsigned char  OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64};
+  static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1;
+  ptrdiff_t token_counts_y[32];
+  ptrdiff_t token_counts_c[32];
+  size_t    bits_y[16];
+  size_t    bits_c[16];
+  int       huff_idxs[2];
+  int       frame_type;
+  int       hgi;
+  frame_type=_enc->state.frame_type;
+  /*Choose which Huffman tables to use for the DC token list.*/
+  oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c);
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  oc_enc_count_bits(_enc,0,token_counts_y,bits_y);
+  oc_enc_count_bits(_enc,0,token_counts_c,bits_c);
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the DC token list with the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1];
+  oc_enc_huff_group_pack(_enc,0,1,huff_idxs);
+  /*Choose which Huffman tables to use for the AC token lists.*/
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  for(hgi=1;hgi<5;hgi++){
+    oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],
+     token_counts_y,token_counts_c);
+    oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y);
+    oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c);
+  }
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the AC token lists using the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1];
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    oc_enc_huff_group_pack(_enc,
+     OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs);
+  }
+}
+
+static void oc_enc_frame_pack(oc_enc_ctx *_enc){
+  oggpackB_reset(&_enc->opb);
+  /*Only proceed if we have some coded blocks.
+    If there are no coded blocks, we can drop this frame simply by emitting a
+     0 byte packet.*/
+  if(_enc->state.ntotal_coded_fragis>0){
+    oc_enc_frame_header_pack(_enc);
+    if(_enc->state.frame_type==OC_INTER_FRAME){
+      /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/
+      oc_enc_coded_flags_pack(_enc);
+      oc_enc_mb_modes_pack(_enc);
+      oc_enc_mvs_pack(_enc);
+    }
+    oc_enc_block_qis_pack(_enc);
+    oc_enc_tokenize_finish(_enc);
+    oc_enc_residual_tokens_pack(_enc);
+  }
+  /*Success: Mark the packet as ready to be flushed.*/
+  _enc->packet_state=OC_PACKET_READY;
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_collect(_enc);
+#endif
+}
+
+
+void oc_enc_vtable_init_c(oc_enc_ctx *_enc){
+  /*The implementations prefixed with oc_enc_ are encoder-specific.
+    The rest we re-use from the decoder.*/
+  _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
+  _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c;
+  _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c;
+  _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c;
+  _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c;
+  _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c;
+  _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
+  _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
+  _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c;
+  _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c;
+}
+
+/*Initialize the macro block neighbor lists for MC analysis.
+  This assumes that the entire mb_info memory region has been initialized with
+   zeros.*/
+static void oc_enc_mb_info_init(oc_enc_ctx *_enc){
+  oc_mb_enc_info    *embs;
+  const signed char *mb_modes;
+  unsigned           nhsbs;
+  unsigned           nvsbs;
+  unsigned           nhmbs;
+  unsigned           nvmbs;
+  unsigned           sby;
+  mb_modes=_enc->state.mb_modes;
+  embs=_enc->mb_info;
+  nhsbs=_enc->state.fplanes[0].nhsbs;
+  nvsbs=_enc->state.fplanes[0].nvsbs;
+  nhmbs=_enc->state.nhmbs;
+  nvmbs=_enc->state.nvmbs;
+  for(sby=0;sby<nvsbs;sby++){
+    unsigned sbx;
+    for(sbx=0;sbx<nhsbs;sbx++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++){
+        /*Because of the Hilbert curve ordering the macro blocks are
+           visited in, the available neighbors change depending on where in
+           a super block the macro block is located.
+          Only the first three vectors are used in the median calculation
+           for the optimal predictor, and so the most important should be
+           listed first.
+          Additional vectors are used, so there will always be at least 3,
+           except for in the upper-left most macro block.*/
+        /*The number of current neighbors for each macro block position.*/
+        static const unsigned char NCNEIGHBORS[4]={4,3,2,4};
+        /*The offset of each current neighbor in the X direction.*/
+        static const signed char   CDX[4][4]={
+          {-1,0,1,-1},
+          {-1,0,-1,},
+          {-1,-1},
+          {-1,0,0,1}
+        };
+        /*The offset of each current neighbor in the Y direction.*/
+        static const signed char   CDY[4][4]={
+          {0,-1,-1,-1},
+          {0,-1,-1},
+          {0,-1},
+          {0,-1,1,-1}
+        };
+        /*The offset of each previous neighbor in the X direction.*/
+        static const signed char   PDX[4]={-1,0,1,0};
+        /*The offset of each previous neighbor in the Y direction.*/
+        static const signed char   PDY[4]={0,-1,0,1};
+        unsigned mbi;
+        int      mbx;
+        int      mby;
+        unsigned nmbi;
+        int      nmbx;
+        int      nmby;
+        int      ni;
+        mbi=(sby*nhsbs+sbx<<2)+quadi;
+        if(mb_modes[mbi]==OC_MODE_INVALID)continue;
+        mbx=2*sbx+(quadi>>1);
+        mby=2*sby+(quadi+1>>1&1);
+        /*Fill in the neighbors with current motion vectors available.*/
+        for(ni=0;ni<NCNEIGHBORS[quadi];ni++){
+          nmbx=mbx+CDX[quadi][ni];
+          nmby=mby+CDY[quadi][ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi;
+        }
+        /*Fill in the neighbors with previous motion vectors available.*/
+        for(ni=0;ni<4;ni++){
+          nmbx=mbx+PDX[ni];
+          nmby=mby+PDY[ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi;
+        }
+      }
+    }
+  }
+}
+
+static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int ret;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_codes==NULL)_codes=TH_VP31_HUFF_CODES;
+  /*Validate the codes.*/
+  oggpackB_reset(&_enc->opb);
+  ret=oc_huff_codes_pack(&_enc->opb,_codes);
+  if(ret<0)return ret;
+  memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes));
+  return 0;
+}
+
+/*Sets the quantization parameters to use.
+  This may only be called before the setup header is written.
+  If it is called multiple times, only the last call has any effect.
+  _qinfo: The quantization parameters.
+          These are described in more detail in theoraenc.h.
+          This can be NULL, in which case the default quantization parameters
+           will be used.*/
+static int oc_enc_set_quant_params(oc_enc_ctx *_enc,
+ const th_quant_info *_qinfo){
+  int qi;
+  int pli;
+  int qti;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO;
+  /*TODO: Analyze for packing purposes instead of just doing a shallow copy.*/
+  memcpy(&_enc->qinfo,_qinfo,sizeof(_enc->qinfo));
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _enc->state.dequant_tables[qi][pli][qti]=
+     _enc->state.dequant_table_data[qi][pli][qti];
+    _enc->enquant_tables[qi][pli][qti]=_enc->enquant_table_data[qi][pli][qti];
+  }
+  oc_enquant_tables_init(_enc->state.dequant_tables,
+   _enc->enquant_tables,_qinfo);
+  memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits,
+   sizeof(_enc->state.loop_filter_limits));
+  oc_enquant_qavg_init(_enc->log_qavg,_enc->state.dequant_tables,
+   _enc->state.info.pixel_fmt);
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc);
+
+static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){
+  th_info   info;
+  size_t    mcu_nmbs;
+  ptrdiff_t mcu_nfrags;
+  int       hdec;
+  int       vdec;
+  int       ret;
+  int       pli;
+  /*Clean up the requested settings.*/
+  memcpy(&info,_info,sizeof(info));
+  info.version_major=TH_VERSION_MAJOR;
+  info.version_minor=TH_VERSION_MINOR;
+  info.version_subminor=TH_VERSION_SUB;
+  if(info.quality>63)info.quality=63;
+  if(info.quality<0)info.quality=32;
+  if(info.target_bitrate<0)info.target_bitrate=0;
+  /*Initialize the shared encoder/decoder state.*/
+  ret=oc_state_init(&_enc->state,&info,4);
+  if(ret<0)return ret;
+  _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info));
+  _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc));
+  _enc->coded_mbis=
+   (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis));
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  /*If chroma is sub-sampled in the vertical direction, we have to encode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _enc->mcu_nvsbs=1<<vdec;
+  mcu_nmbs=_enc->mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4;
+  mcu_nfrags=4*mcu_nmbs+(8*mcu_nmbs>>hdec+vdec);
+  _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc(
+   mcu_nfrags*sizeof(*_enc->mcu_skip_ssd));
+  for(pli=0;pli<3;pli++){
+    _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens));
+    _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits));
+  }
+#if defined(OC_COLLECT_METRICS)
+  _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd));
+  _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd));
+#endif
+#if defined(OC_X86_ASM)
+  oc_enc_vtable_init_x86(_enc);
+#else
+  oc_enc_vtable_init_c(_enc);
+#endif
+  _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
+  _enc->state.qis[0]=_enc->state.info.quality;
+  _enc->state.nqis=1;
+  oc_rc_state_init(&_enc->rc,_enc);
+  oggpackB_writeinit(&_enc->opb);
+  if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL||
+   _enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL||
+   _enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL||
+   _enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL||
+   _enc->extra_bits[2]==NULL
+#if defined(OC_COLLECT_METRICS)
+   ||_enc->frag_satd==NULL||_enc->frag_ssd==NULL
+#endif
+   ){
+    oc_enc_clear(_enc);
+    return TH_EFAULT;
+  }
+  oc_mode_scheme_chooser_init(&_enc->chooser);
+  oc_enc_mb_info_init(_enc);
+  memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs));
+  /*Reset the packet-out state machine.*/
+  _enc->packet_state=OC_PACKET_INFO_HDR;
+  _enc->dup_count=0;
+  _enc->nqueued_dups=0;
+  _enc->prev_dup_count=0;
+  /*Disable VP3 compatibility by default.*/
+  _enc->vp3_compatible=0;
+  /*No INTER frames coded yet.*/
+  _enc->coded_inter_frame=0;
+  memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
+  oc_enc_set_quant_params(_enc,NULL);
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc){
+  int pli;
+  oc_rc_state_clear(&_enc->rc);
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_dump(_enc);
+#endif
+  oggpackB_writeclear(&_enc->opb);
+#if defined(OC_COLLECT_METRICS)
+  _ogg_free(_enc->frag_ssd);
+  _ogg_free(_enc->frag_satd);
+#endif
+  for(pli=3;pli-->0;){
+    oc_free_2d(_enc->extra_bits[pli]);
+    oc_free_2d(_enc->dct_tokens[pli]);
+  }
+  _ogg_free(_enc->mcu_skip_ssd);
+  _ogg_free(_enc->coded_mbis);
+  _ogg_free(_enc->frag_dc);
+  _ogg_free(_enc->mb_info);
+  oc_state_clear(&_enc->state);
+}
+
+static void oc_enc_drop_frame(th_enc_ctx *_enc){
+  /*Use the previous frame's reconstruction.*/
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=
+   _enc->state.ref_frame_idx[OC_FRAME_PREV];
+  /*Flag motion vector analysis about the frame drop.*/
+  _enc->prevframe_dropped=1;
+  /*Zero the packet.*/
+  oggpackB_reset(&_enc->opb);
+}
+
+static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME,
+     _enc->state.curframe_num>0);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTRA_FRAME);
+  oc_enc_analyze_intra(_enc,_recode);
+  oc_enc_frame_pack(_enc);
+  /*On the first frame, the previous call was an initial dry-run to prime
+     feed-forward statistics.*/
+  if(!_recode&&_enc->state.curframe_num==0){
+    if(_enc->state.info.target_bitrate>0){
+      oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+                             OC_INTRA_FRAME,_enc->state.qis[0],1,0);
+    }
+    oc_enc_compress_keyframe(_enc,1);
+  }
+}
+
+static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTER_FRAME);
+  if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){
+    /*Mode analysis thinks this should have been a keyframe; start over.*/
+    oc_enc_compress_keyframe(_enc,1);
+  }
+  else{
+    oc_enc_frame_pack(_enc);
+    if(!_enc->coded_inter_frame){
+      /*On the first INTER frame, the previous call was an initial dry-run to
+         prime feed-forward statistics.*/
+      _enc->coded_inter_frame=1;
+      if(_enc->state.info.target_bitrate>0){
+        /*Rate control also needs to prime.*/
+        oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+         OC_INTER_FRAME,_enc->state.qis[0],1,0);
+      }
+      oc_enc_compress_frame(_enc,1);
+    }
+  }
+}
+
+/*Set the granule position for the next packet to output based on the current
+   internal state.*/
+static void oc_enc_set_granpos(oc_enc_ctx *_enc){
+  unsigned dup_offs;
+  /*Add an offset for the number of duplicate frames we've emitted so far.*/
+  dup_offs=_enc->prev_dup_count-_enc->nqueued_dups;
+  /*If the current frame was a keyframe, use it for the high part.*/
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)+dup_offs;
+  }
+  /*Otherwise use the last keyframe in the high part and put the current frame
+     in the low part.*/
+  else{
+    _enc->state.granpos=
+     (_enc->state.keyframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)
+     +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs;
+  }
+}
+
+
+th_enc_ctx *th_encode_alloc(const th_info *_info){
+  oc_enc_ctx *enc;
+  if(_info==NULL)return NULL;
+  enc=_ogg_malloc(sizeof(*enc));
+  if(enc==NULL||oc_enc_init(enc,_info)<0){
+    _ogg_free(enc);
+    return NULL;
+  }
+  return enc;
+}
+
+void th_encode_free(th_enc_ctx *_enc){
+  if(_enc!=NULL){
+    oc_enc_clear(_enc);
+    _ogg_free(_enc);
+  }
+}
+
+int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
+  switch(_req){
+    case TH_ENCCTL_SET_HUFFMAN_CODES:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf);
+    }break;
+    case TH_ENCCTL_SET_QUANT_PARAMS:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf);
+    }break;
+    case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{
+      ogg_uint32_t keyframe_frequency_force;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL;
+      keyframe_frequency_force=*(ogg_uint32_t *)_buf;
+      if(_enc->packet_state==OC_PACKET_INFO_HDR){
+        /*It's still early enough to enlarge keyframe_granule_shift.*/
+        _enc->state.info.keyframe_granule_shift=OC_CLAMPI(
+         _enc->state.info.keyframe_granule_shift,
+         OC_ILOG_32(keyframe_frequency_force-1),31);
+      }
+      _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force,
+       (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift);
+      *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_VP3_COMPATIBLE:{
+      int vp3_compatible;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL;
+      vp3_compatible=*(int *)_buf;
+      _enc->vp3_compatible=vp3_compatible;
+      if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0;
+      if(oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO)<0)vp3_compatible=0;
+      if(_enc->state.info.pixel_fmt!=TH_PF_420||
+       _enc->state.info.pic_width<_enc->state.info.frame_width||
+       _enc->state.info.pic_height<_enc->state.info.frame_height||
+      /*If we have more than 4095 super blocks, VP3's RLE coding might
+         overflow.
+        We could overcome this by ensuring we flip the coded/not-coded flags on
+         at least one super block in the frame, but we pick the simple solution
+         of just telling the user the stream will be incompatible instead.
+        It's unlikely the old VP3 codec would be able to decode streams at this
+         resolution in real time in the first place.*/
+       _enc->state.nsbs>4095){
+        vp3_compatible=0;
+      }
+      *(int *)_buf=vp3_compatible;
+      return 0;
+    }break;
+    case TH_ENCCTL_GET_SPLEVEL_MAX:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(int))return TH_EINVAL;
+      *(int *)_buf=2;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_SPLEVEL:{
+      int speed;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(speed))return TH_EINVAL;
+      speed=*(int *)_buf;
+      switch(speed){
+        case 0:{
+          /*_enc->MotionCompensation=1;*/
+          /*_enc->info.quick_p=0;*/
+        }break;
+        case 1:{
+          /*_enc->MotionCompensation=1;*/
+          /*_enc->info.quick_p=1;*/
+        }break;
+        case 2:{
+          /*_enc->MotionCompensation=0;*/
+          /*_enc->info.quick_p=1;*/
+        }break;
+        default:{
+          return TH_EINVAL;
+        }
+      }
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_DUP_COUNT:{
+      int dup_count;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(dup_count))return TH_EINVAL;
+      dup_count=*(int *)_buf;
+      if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL;
+      _enc->dup_count=OC_MAXI(dup_count,0);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_QUALITY:{
+      int qi;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate>0)return TH_EINVAL;
+      qi=*(int *)_buf;
+      if(qi<0||qi>63)return TH_EINVAL;
+      _enc->state.info.quality=qi;
+      _enc->state.qis[0]=(unsigned char)qi;
+      _enc->state.nqis=1;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_BITRATE:{
+      long bitrate;
+      int  reset;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      bitrate=*(long *)_buf;
+      if(bitrate<=0)return TH_EINVAL;
+      reset=_enc->state.info.target_bitrate<=0;
+      _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate;
+      if(reset)oc_rc_state_init(&_enc->rc,_enc);
+      else oc_enc_rc_resize(_enc);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_FLAGS:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES;
+      _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW;
+      _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_BUFFER:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.buf_delay=set;
+      oc_enc_rc_resize(_enc);
+      *(int *)_buf=_enc->rc.buf_delay;
+      return 0;
+    }break;
+    case TH_ENCCTL_2PASS_OUT:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=1||
+       _buf_sz!=sizeof(unsigned char *)){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf);
+    }break;
+    case TH_ENCCTL_2PASS_IN:{
+      if(_enc==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz);
+    }break;
+    default:return TH_EIMPL;
+  }
+}
+
+int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){
+  if(_enc==NULL)return TH_EFAULT;
+  return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb,
+   &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(),
+   _tc,_op);
+}
+
+static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src,
+ ogg_int32_t _pic_x,ogg_int32_t _pic_y,
+ ogg_int32_t _pic_width,ogg_int32_t _pic_height){
+  unsigned char *dst;
+  int            dstride;
+  ogg_uint32_t   frame_width;
+  ogg_uint32_t   frame_height;
+  ogg_uint32_t   y;
+  frame_width=_dst->width;
+  frame_height=_dst->height;
+  /*If we have _no_ data, just encode a dull green.*/
+  if(_pic_width==0||_pic_height==0){
+    dst=_dst->data;
+    dstride=_dst->stride;
+    for(y=0;y<frame_height;y++){
+      memset(dst,0,frame_width*sizeof(*dst));
+      dst+=dstride;
+    }
+  }
+  /*Otherwise, copy what we do have, and add our own padding.*/
+  else{
+    unsigned char *dst_data;
+    unsigned char *src_data;
+    unsigned char *src;
+    int            sstride;
+    ogg_uint32_t   x;
+    /*Step 1: Copy the data we do have.*/
+    dstride=_dst->stride;
+    sstride=_src->stride;
+    dst_data=_dst->data;
+    src_data=_src->data;
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x;
+    src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x;
+    for(y=0;y<_pic_height;y++){
+      memcpy(dst,src,_pic_width);
+      dst+=dstride;
+      src+=sstride;
+    }
+    /*Step 2: Perform a low-pass extension into the padding region.*/
+    /*Left side.*/
+    for(x=_pic_x;x-->0;){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x;
+      for(y=0;y<_pic_height;y++){
+        dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1]
+         +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Right side.*/
+    for(x=_pic_x+_pic_width;x<frame_width;x++){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x-1;
+      for(y=0;y<_pic_height;y++){
+        dst[1]=(dst[0]<<1)+(dst-(dstride&-(y>0)))[0]
+         +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Top.*/
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride;
+    for(y=_pic_y;y-->0;){
+      for(x=0;x<frame_width;x++){
+        (dst-dstride)[x]=(dst[x]<<1)+dst[x-(x>0)]
+         +dst[x+(x+1<frame_width)]+2>>2;
+      }
+      dst-=dstride;
+    }
+    /*Bottom.*/
+    dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride;
+    for(y=_pic_y+_pic_height;y<frame_height;y++){
+      for(x=0;x<frame_width;x++){
+        dst[x]=((dst-dstride)[x]<<1)+(dst-dstride)[x-(x>0)]
+         +(dst-dstride)[x+(x+1<frame_width)]+2>>2;
+      }
+      dst+=dstride;
+    }
+  }
+}
+
+int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){
+  th_ycbcr_buffer img;
+  int             cframe_width;
+  int             cframe_height;
+  int             cpic_width;
+  int             cpic_height;
+  int             cpic_x;
+  int             cpic_y;
+  int             hdec;
+  int             vdec;
+  int             pli;
+  int             refi;
+  int             drop;
+  /*Step 1: validate parameters.*/
+  if(_enc==NULL||_img==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL;
+  if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL;
+  if((ogg_uint32_t)_img[0].width!=_enc->state.info.frame_width||
+   (ogg_uint32_t)_img[0].height!=_enc->state.info.frame_height){
+    return TH_EINVAL;
+  }
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  cframe_width=_enc->state.info.frame_width>>hdec;
+  cframe_height=_enc->state.info.frame_height>>vdec;
+  if(_img[1].width!=cframe_width||_img[2].width!=cframe_width||
+   _img[1].height!=cframe_height||_img[2].height!=cframe_height){
+    return TH_EINVAL;
+  }
+  /*Step 2: Copy the input to our internal buffer.
+    This lets us add padding, if necessary, so we don't have to worry about
+     dereferencing possibly invalid addresses, and allows us to use the same
+     strides and fragment offsets for both the input frame and the reference
+     frames.*/
+  /*Flip the input buffer upside down.*/
+  oc_ycbcr_buffer_flip(img,_img);
+  oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+0,img+0,
+   _enc->state.info.pic_x,_enc->state.info.pic_y,
+   _enc->state.info.pic_width,_enc->state.info.pic_height);
+  cpic_x=_enc->state.info.pic_x>>hdec;
+  cpic_y=_enc->state.info.pic_y>>vdec;
+  cpic_width=(_enc->state.info.pic_x+_enc->state.info.pic_width+hdec>>hdec)
+   -cpic_x;
+  cpic_height=(_enc->state.info.pic_y+_enc->state.info.pic_height+vdec>>vdec)
+   -cpic_y;
+  for(pli=1;pli<3;pli++){
+    oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+pli,img+pli,
+     cpic_x,cpic_y,cpic_width,cpic_height);
+  }
+  /*Step 3: Update the buffer state.*/
+  if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
+    _enc->state.ref_frame_idx[OC_FRAME_PREV]=
+     _enc->state.ref_frame_idx[OC_FRAME_SELF];
+    if(_enc->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _enc->state.keyframe_num=_enc->state.curframe_num;
+      _enc->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _enc->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+  }
+  /*Select a free buffer to use for the reconstructed version of this frame.*/
+  for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
+   refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+  _enc->state.curframe_num+=_enc->prev_dup_count+1;
+  /*Step 4: Compress the frame.*/
+  /*Start with a keyframe, and don't allow the generation of invalid files that
+     overflow the keyframe_granule_shift.*/
+  if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0||
+   _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>=
+   _enc->keyframe_frequency_force){
+    oc_enc_compress_keyframe(_enc,0);
+    drop=0;
+  }
+  else{
+    oc_enc_compress_frame(_enc,0);
+    drop=1;
+  }
+  oc_restore_fpu(&_enc->state);
+  /*drop currently indicates if the frame is droppable.*/
+  if(_enc->state.info.target_bitrate>0){
+    drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+     _enc->state.frame_type,_enc->state.qis[0],0,drop);
+  }
+  else drop=0;
+  /*drop now indicates if the frame was dropped.*/
+  if(drop)oc_enc_drop_frame(_enc);
+  else _enc->prevframe_dropped=0;
+  _enc->packet_state=OC_PACKET_READY;
+  _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count;
+  _enc->dup_count=0;
+#if defined(OC_DUMP_IMAGES)
+  oc_enc_set_granpos(_enc);
+  oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src");
+  oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec");
+#endif
+  return 0;
+}
+
+int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){
+  if(_enc==NULL||_op==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_READY){
+    _enc->packet_state=OC_PACKET_EMPTY;
+    if(_enc->rc.twopass!=1){
+      unsigned char *packet;
+      packet=oggpackB_get_buffer(&_enc->opb);
+      /*If there's no packet, malloc failed while writing; it's lost forever.*/
+      if(packet==NULL)return TH_EFAULT;
+      _op->packet=packet;
+      _op->bytes=oggpackB_bytes(&_enc->opb);
+    }
+    /*For the first pass in 2-pass mode, don't emit any packet data.*/
+    else{
+      _op->packet=NULL;
+      _op->bytes=0;
+    }
+  }
+  else if(_enc->packet_state==OC_PACKET_EMPTY){
+    if(_enc->nqueued_dups>0){
+      _enc->nqueued_dups--;
+      _op->packet=NULL;
+      _op->bytes=0;
+    }
+    else{
+      if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+      return 0;
+    }
+  }
+  else return 0;
+  _last_p=_last_p&&_enc->nqueued_dups<=0;
+  _op->b_o_s=0;
+  _op->e_o_s=_last_p;
+  oc_enc_set_granpos(_enc);
+  _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3;
+  _op->granulepos=_enc->state.granpos;
+  if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+  return 1+_enc->nqueued_dups;
+}

Copied: trunk/theora/lib/encoder_disabled.c (from rev 16442, trunk/theora/lib/enc/encoder_disabled.c)
===================================================================
--- trunk/theora/lib/encoder_disabled.c	                        (rev 0)
+++ trunk/theora/lib/encoder_disabled.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,67 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include "apiwrapper.h"
+#include "encint.h"
+
+th_enc_ctx *th_encode_alloc(const th_info *_info){
+  return NULL;
+}
+
+void th_encode_free(th_enc_ctx *_enc){}
+
+
+int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
+  return OC_DISABLED;
+}
+
+int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){
+  return OC_DISABLED;
+}
+
+int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+  return OC_DISABLED;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+  return OC_DISABLED;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+  return OC_DISABLED;
+}

Copied: trunk/theora/lib/enquant.c (from rev 16442, trunk/theora/lib/enc/enquant.c)
===================================================================
--- trunk/theora/lib/enquant.c	                        (rev 0)
+++ trunk/theora/lib/enquant.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,274 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2005                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
+  const th_quant_ranges *qranges;
+  const th_quant_base   *base_mats[2*3*64];
+  int                    indices[2][3][64];
+  int                    nbase_mats;
+  int                    nbits;
+  int                    ci;
+  int                    qi;
+  int                    qri;
+  int                    qti;
+  int                    pli;
+  int                    qtj;
+  int                    plj;
+  int                    bmi;
+  int                    i;
+  i=_qinfo->loop_filter_limits[0];
+  for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
+  nbits=OC_ILOG_32(i);
+  oggpackB_write(_opb,nbits,3);
+  for(qi=0;qi<64;qi++){
+    oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
+  }
+  /*580 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
+  /*516 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
+  /*Consolidate any duplicate base matrices.*/
+  nbase_mats=0;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    for(qri=0;qri<=qranges->nranges;qri++){
+      for(bmi=0;;bmi++){
+        if(bmi>=nbase_mats){
+          base_mats[bmi]=qranges->base_matrices+qri;
+          indices[qti][pli][qri]=nbase_mats++;
+          break;
+        }
+        else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
+         sizeof(base_mats[bmi][0]))==0){
+          indices[qti][pli][qri]=bmi;
+          break;
+        }
+      }
+    }
+  }
+  /*Write out the list of unique base matrices.
+    1545 bits for VP3 matrices.*/
+  oggpackB_write(_opb,nbase_mats-1,9);
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
+  }
+  /*Now store quant ranges and their associated indices into the base matrix
+     list.
+    46 bits for VP3 matrices.*/
+  nbits=OC_ILOG_32(nbase_mats-1);
+  for(i=0;i<6;i++){
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      if(qti>0){
+        if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
+         memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
+         qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+         memcmp(indices[qti][pli],indices[qti-1][pli],
+         (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+          oggpackB_write(_opb,1,2);
+          continue;
+        }
+      }
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
+       memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
+       qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+       memcmp(indices[qti][pli],indices[qtj][plj],
+       (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+        oggpackB_write(_opb,0,1+(qti>0));
+        continue;
+      }
+      oggpackB_write(_opb,1,1);
+    }
+    oggpackB_write(_opb,indices[qti][pli][0],nbits);
+    for(qi=qri=0;qi<63;qri++){
+      oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi));
+      qi+=qranges->sizes[qri];
+      oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
+    }
+  }
+}
+
+static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){
+  ogg_uint32_t t;
+  int          l;
+  _d<<=1;
+  l=OC_ILOGNZ_32(_d)-1;
+  t=1+((ogg_uint32_t)1<<16+l)/_d;
+  _this->m=(ogg_int16_t)(t-0x10000);
+  _this->l=l;
+}
+
+/*See comments at oc_dequant_tables_init() for how the quantization tables'
+   storage should be initialized.*/
+void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){
+  int qi;
+  int pli;
+  int qti;
+  /*Initialize the dequantization tables first.*/
+  oc_dequant_tables_init(_dequant,NULL,_qinfo);
+  /*Derive the quantization tables directly from the dequantization tables.*/
+  for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    int zzi;
+    int plj;
+    int qtj;
+    int dupe;
+    dupe=0;
+    for(qtj=0;qtj<=qti;qtj++){
+      for(plj=0;plj<(qtj<qti?3:pli);plj++){
+        if(_dequant[qi][pli][qti]==_dequant[qi][plj][qtj]){
+          dupe=1;
+          break;
+        }
+      }
+      if(dupe)break;
+    }
+    if(dupe){
+      _enquant[qi][pli][qti]=_enquant[qi][plj][qtj];
+      continue;
+    }
+    /*In the original VP3.2 code, the rounding offset and the size of the
+       dead zone around 0 were controlled by a "sharpness" parameter.
+      We now R-D optimize the tokens for each block after quantization,
+       so the rounding offset should always be 1/2, and an explicit dead
+       zone is unnecessary.
+      Hence, all of that VP3.2 code is gone from here, and the remaining
+       floating point code has been implemented as equivalent integer
+       code with exact precision.*/
+    for(zzi=0;zzi<64;zzi++){
+      oc_iquant_init(_enquant[qi][pli][qti]+zzi,
+       _dequant[qi][pli][qti][zzi]);
+    }
+  }
+}
+
+
+
+/*This table gives the square root of the fraction of the squared magnitude of
+   each DCT coefficient relative to the total, scaled by 2**16, for both INTRA
+   and INTER modes.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video (from QCIF to 1080p) encoded at
+   all possible rates.
+  The DC coefficient takes into account the DPCM prediction (using the
+   quantized values from neighboring blocks, as the encoder does, but still
+   before quantization of the coefficient in the current block).
+  The results differ significantly from the expected variance (e.g., using an
+   AR(1) model of the signal with rho=0.95, as is frequently done to compute
+   the coding gain of the DCT).
+  We use them to estimate an "average" quantizer for a given quantizer matrix,
+   as this is used to parameterize a number of the rate control decisions.
+  These values are themselves probably quantizer-matrix dependent, since the
+   shape of the matrix affects the noise distribution in the reference frames,
+   but they should at least give us _some_ amount of adaptivity to different
+   matrices, as opposed to hard-coding a table of average Q values for the
+   current set.
+  The main features they capture are that a) only a few of the quantizers in
+   the upper-left corner contribute anything significant at all (though INTER
+   mode is significantly flatter) and b) the DPCM prediction of the DC
+   coefficient gives a very minor improvement in the INTRA case and a quite
+   significant one in the INTER case (over the expected variance).*/
+static const ogg_uint16_t OC_RPSD[2][64]={
+  {
+    52725,17370,10399, 6867, 5115, 3798, 2942, 2076,
+    17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619,
+    10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461,
+     6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243,
+     5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091,
+     3798, 2869, 2573, 2164, 1884, 1495, 1212,  873,
+     2942, 2229, 2015, 1718, 1530, 1212, 1001,  704,
+     2076, 1619, 1461, 1243, 1091,  873,  704,  474
+  },
+  {
+    23411,15604,13529,11601,10683, 8958, 7840, 6142,
+    15604,11901,10718, 9108, 8290, 6961, 6023, 4487,
+    13529,10718, 9961, 8527, 7945, 6689, 5742, 4333,
+    11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743,
+    10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504,
+     8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989,
+     7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558,
+     6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829
+  }
+};
+
+/*The fraction of the squared magnitude of the residuals in each color channel
+   relative to the total, scaled by 2**16, for each pixel format.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video encoded at all possible rates.
+  TODO: These values are only from INTER frames; it should be re-measured for
+   INTRA frames.*/
+static const ogg_uint16_t OC_PCD[4][3]={
+  {59926, 3038, 2572},
+  {55201, 5597, 4738},
+  {55201, 5597, 4738},
+  {47682, 9669, 8185}
+};
+
+
+/*Compute an "average" quantizer for each qi level.
+  We do one for INTER and one for INTRA, since their behavior is very
+   different, but average across chroma channels.
+  The basic approach is to compute a harmonic average of the squared quantizer,
+   weighted by the expected squared magnitude of the DCT coefficients.
+  Under the (not quite true) assumption that DCT coefficients are
+   Laplacian-distributed, this preserves the product Q*lambda, where
+   lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be
+   confused with the lambda used in R-D optimization throughout most of the
+   rest of the code).
+  The value Q*lambda completely determines the entropy of the coefficients.*/
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){
+  int qi;
+  int pli;
+  int qti;
+  int ci;
+  for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){
+    ogg_int64_t q2;
+    q2=0;
+    for(pli=0;pli<3;pli++){
+      ogg_uint32_t qp;
+      qp=0;
+      for(ci=0;ci<64;ci++){
+        unsigned rq;
+        unsigned qd;
+        qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]];
+        rq=(OC_RPSD[qti][ci]+(qd>>1))/qd;
+        qp+=rq*(ogg_uint32_t)rq;
+      }
+      q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp;
+    }
+    /*qavg=1.0/sqrt(q2).*/
+    _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1;
+  }
+}

Copied: trunk/theora/lib/enquant.h (from rev 16442, trunk/theora/lib/enc/enquant.h)
===================================================================
--- trunk/theora/lib/enquant.h	                        (rev 0)
+++ trunk/theora/lib/enquant.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,27 @@
+#if !defined(_enquant_H)
+# define _enquant_H (1)
+# include "quant.h"
+
+typedef struct oc_iquant oc_iquant;
+
+#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1))
+
+/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0))
+   (i.e., one 16x16->16 mul, 2 shifts, and 2 adds).
+  This is not an approximation; for 16-bit x and d, it is exact.*/
+struct oc_iquant{
+  ogg_int16_t m;
+  ogg_int16_t l;
+};
+
+typedef oc_iquant        oc_iquant_table[64];
+
+
+
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo);
+void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo);
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt);
+
+#endif

Copied: trunk/theora/lib/fdct.c (from rev 16442, trunk/theora/lib/enc/fdct.c)
===================================================================
--- trunk/theora/lib/fdct.c	                        (rev 0)
+++ trunk/theora/lib/fdct.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,422 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include "encint.h"
+#include "dct.h"
+
+
+
+/*Performs a forward 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 from the orthonormal version of the
+   transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).*/
+static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int r;
+  int s;
+  int u;
+  int v;
+  /*Stage 1:*/
+  /*0-7 butterfly.*/
+  t0=_x[0<<3]+(int)_x[7<<3];
+  t7=_x[0<<3]-(int)_x[7<<3];
+  /*1-6 butterfly.*/
+  t1=_x[1<<3]+(int)_x[6<<3];
+  t6=_x[1<<3]-(int)_x[6<<3];
+  /*2-5 butterfly.*/
+  t2=_x[2<<3]+(int)_x[5<<3];
+  t5=_x[2<<3]-(int)_x[5<<3];
+  /*3-4 butterfly.*/
+  t3=_x[3<<3]+(int)_x[4<<3];
+  t4=_x[3<<3]-(int)_x[4<<3];
+  /*Stage 2:*/
+  /*0-3 butterfly.*/
+  r=t0+t3;
+  t3=t0-t3;
+  t0=r;
+  /*1-2 butterfly.*/
+  r=t1+t2;
+  t2=t1-t2;
+  t1=r;
+  /*6-5 butterfly.*/
+  r=t6+t5;
+  t5=t6-t5;
+  t6=r;
+  /*Stages 3 and 4 are where all the approximation occurs.
+    These are chosen to be as close to an exact inverse of the approximations
+     made in the iDCT as possible, while still using mostly 16-bit arithmetic.
+    We use some 16x16->32 signed MACs, but those still commonly execute in 1
+     cycle on a 16-bit DSP.
+    For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
+     t5=(OC_C4S4*s>>16).
+    That is, applying the latter to the output of the former will recover t5
+     exactly (over the valid input range of t5, -23171...23169).
+    We increase the rounding bias to 0xB500 in this particular case so that
+     errors inverting the subsequent butterfly are not one-sided (e.g., the
+     mean error is very close to zero).
+    The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
+    The fDCT of an all-zeros block will still not be zero, because of the
+     biases we added at the very beginning of the process, but it will be close
+     enough that it is guaranteed to round to zero.*/
+  /*Stage 3:*/
+  /*4-5 butterfly.*/
+  s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
+  r=t4+s;
+  t5=t4-s;
+  t4=r;
+  /*7-6 butterfly.*/
+  s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
+  r=t7+s;
+  t6=t7-s;
+  t7=r;
+  /*Stage 4:*/
+  /*0-1 butterfly.*/
+  r=(27146*t0+0x4000>>16)+t0+(t0!=0);
+  s=(27146*t1+0xB500>>16)+t1+(t1!=0);
+  u=r+s>>1;
+  v=r-u;
+  _y[0]=u;
+  _y[4]=v;
+  /*3-2 rotation by 6pi/16*/
+  u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
+  s=(OC_C6S2*u>>16)-t2;
+  v=(s*21600+0x2800>>18)+s+(s!=0);
+  _y[2]=u;
+  _y[6]=v;
+  /*6-5 rotation by 3pi/16*/
+  u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
+  s=t6-(OC_C5S3*u>>16);
+  v=(s*26568+0x3400>>17)+s+(s!=0);
+  _y[5]=u;
+  _y[3]=v;
+  /*7-4 rotation by 7pi/16*/
+  u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
+  s=(OC_C7S1*u>>16)-t4;
+  v=(s*20539+0x3000>>20)+s+(s!=0);
+  _y[1]=u;
+  _y[7]=v;
+}
+
+void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
+ const ogg_int16_t _x[64]){
+  (*_enc->opt_vtable.fdct8x8)(_y,_x);
+}
+
+/*Performs a forward 8x8 Type-II DCT transform.
+  The output is scaled by a factor of 4 relative to the orthonormal version
+   of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients. */
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  int                i;
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(i=0;i<64;i++)w[i]=_x[i]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform columns of w into rows of _y.*/
+  for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Transform columns of _y into rows of w.*/
+  for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(i=0;i<64;i++)_y[i]=w[i]+2>>2;
+}
+
+
+
+/*This does not seem to outperform simple LFE border padding before MC.
+  It yields higher PSNR, but much higher bitrate usage.*/
+#if 0
+typedef struct oc_extension_info oc_extension_info;
+
+
+
+/*Information needed to pad boundary blocks.
+  We multiply each row/column by an extension matrix that fills in the padding
+   values as a linear combination of the active values, so that an equivalent
+   number of coefficients are forced to zero.
+  This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
+   little as 7 multiplies.
+  We compute the extension matrices for every possible shape in advance, as
+   there are only 35.
+  The coefficients for all matrices are stored in a single array to take
+   advantage of the overlap and repetitiveness of many of the shapes.
+  A similar technique is applied to the offsets into this array.
+  This reduces the required table storage by about 48%.
+  See tools/extgen.c for details.
+  We could conceivably do the same for all 256 possible shapes.*/
+struct oc_extension_info{
+  /*The mask of the active pixels in the shape.*/
+  short                     mask;
+  /*The number of active pixels in the shape.*/
+  short                     na;
+  /*The extension matrix.
+    This is (8-na)xna*/
+  const ogg_int16_t *const *ext;
+  /*The pixel indices: na active pixels followed by 8-na padding pixels.*/
+  unsigned char             pi[8];
+  /*The coefficient indices: na unconstrained coefficients followed by 8-na
+     coefficients to be forced to zero.*/
+  unsigned char             ci[8];
+};
+
+
+/*The number of shapes we need.*/
+#define OC_NSHAPES   (35)
+
+static const ogg_int16_t OC_EXT_COEFFS[229]={
+  0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
+  0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
+  0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
+  0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
+  0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
+  0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
+  0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
+  0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
+  0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
+  0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
+  0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
+  0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
+  0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
+  0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
+  0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
+  0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
+  0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
+  0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
+  0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
+  0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
+  0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
+  0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
+  0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
+  0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
+  0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
+  0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
+  0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
+  0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
+  0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
+};
+
+static const ogg_int16_t *const OC_EXT_ROWS[96]={
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   6,
+  OC_EXT_COEFFS+  27,OC_EXT_COEFFS+  38,OC_EXT_COEFFS+  43,OC_EXT_COEFFS+  32,
+  OC_EXT_COEFFS+  49,OC_EXT_COEFFS+  58,OC_EXT_COEFFS+  67,OC_EXT_COEFFS+  71,
+  OC_EXT_COEFFS+  62,OC_EXT_COEFFS+  53,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  14,OC_EXT_COEFFS+  13,OC_EXT_COEFFS+  76,OC_EXT_COEFFS+  81,
+  OC_EXT_COEFFS+  86,OC_EXT_COEFFS+  91,OC_EXT_COEFFS+  96,OC_EXT_COEFFS+  98,
+  OC_EXT_COEFFS+  93,OC_EXT_COEFFS+  88,OC_EXT_COEFFS+  83,OC_EXT_COEFFS+  78,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
+  OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+  16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
+  OC_EXT_COEFFS+  20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
+  OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
+  OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
+  OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
+  OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
+  OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
+  OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+  24,OC_EXT_COEFFS+ 204,
+  OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
+  OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
+  OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+ 134,
+  OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
+  OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
+};
+
+static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
+  {0x7F,7,OC_EXT_ROWS+  0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
+  {0xFE,7,OC_EXT_ROWS+  7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
+  {0x3F,6,OC_EXT_ROWS+  8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
+  {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
+  {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
+  {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
+  {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
+  {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
+  {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
+  {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
+  {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
+  {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x01,1,OC_EXT_ROWS+  0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
+  {0x80,1,OC_EXT_ROWS+  0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
+  {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
+  {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
+  {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
+  {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
+  {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
+  {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
+  {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
+  {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
+  {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
+  {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
+  {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
+  {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
+  {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
+  {0x40,1,OC_EXT_ROWS+  0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x20,1,OC_EXT_ROWS+  0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x10,1,OC_EXT_ROWS+  0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x08,1,OC_EXT_ROWS+  0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x04,1,OC_EXT_ROWS+  0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
+  {0x02,1,OC_EXT_ROWS+  0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
+};
+
+
+
+/*Pads a single column of a partial block and then performs a forward Type-II
+   DCT on the result.
+  The input is scaled by a factor of 4 and biased appropriately for the current
+   fDCT implementation.
+  The output is scaled by an additional factor of 2 from the orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).
+  _e: The extension information for the shape.*/
+static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
+ const oc_extension_info *_e){
+  const unsigned char *pi;
+  int                  na;
+  na=_e->na;
+  pi=_e->pi;
+  if(na==1){
+    int ci;
+    /*While the branch below is still correct for shapes with na==1, we can
+       perform the entire transform with just 1 multiply in this case instead
+       of 23.*/
+    _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
+    for(ci=1;ci<8;ci++)_y[ci]=0;
+  }
+  else{
+    const ogg_int16_t *const *ext;
+    int                       zpi;
+    int                       api;
+    int                       nz;
+    /*First multiply by the extension matrix to compute the padding values.*/
+    nz=8-na;
+    ext=_e->ext;
+    for(zpi=0;zpi<nz;zpi++){
+      ogg_int32_t v;
+      v=0;
+      for(api=0;api<na;api++){
+        v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
+      }
+      _x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
+    }
+    oc_fdct8(_y,_x);
+  }
+}
+
+/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
+   border of the picture region.
+  This method ONLY works with rectangular regions.
+  _border: A description of which pixels are inside the border.
+  _y:      The buffer to store the result in.
+           This may be the same as _x.
+  _x:      The input pixel values.
+           Pixel values outside the border will be ignored.*/
+void oc_fdct8x8_border(const oc_border_info *_border,
+ ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ogg_int16_t             *in;
+  ogg_int16_t             *out;
+  ogg_int16_t              w[64];
+  ogg_int64_t              mask;
+  const oc_extension_info *cext;
+  const oc_extension_info *rext;
+  int                      cmask;
+  int                      rmask;
+  int                      ri;
+  int                      ci;
+  /*Identify the shapes of the non-zero rows and columns.*/
+  rmask=cmask=0;
+  mask=_border->mask;
+  for(ri=0;ri<8;ri++){
+    /*This aggregation is _only_ correct for rectangular masks.*/
+    cmask|=((mask&0xFF)!=0)<<ri;
+    rmask|=mask&0xFF;
+    mask>>=8;
+  }
+  /*Find the associated extension info for these shapes.*/
+  if(cmask==0xFF)cext=NULL;
+  else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  if(rmask==0xFF)rext=NULL;
+  else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.
+    We can safely add them before padding, since if these pixel values are
+     overwritten, we didn't care what they were anyway (and the unbiased values
+     will usually yield smaller DCT coefficient magnitudes).*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform the columns.
+    We can ignore zero columns without a problem.*/
+  in=w;
+  out=_y;
+  if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
+  else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
+  /*Transform the rows.
+    We transform even rows that are supposedly zero, because rounding errors
+     may make them slightly non-zero, and this will give a more precise
+     reconstruction with very small quantizers.*/
+  in=_y;
+  out=w;
+  if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
+  else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
+}
+#endif

Copied: trunk/theora/lib/fragment.c (from rev 16442, trunk/theora/lib/dec/fragment.c)
===================================================================
--- trunk/theora/lib/fragment.c	                        (rev 0)
+++ trunk/theora/lib/fragment.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,87 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+#include <string.h>
+#include "internal.h"
+
+void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride){
+  (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride);
+}
+
+void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
+  int i;
+  for(i=8;i-->0;){
+    memcpy(_dst,_src,8*sizeof(*_dst));
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
+ int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue);
+}
+
+void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
+    _dst+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue);
+}
+
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride,
+ const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue);
+}
+
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_restore_fpu(const oc_theora_state *_state){
+  _state->opt_vtable.restore_fpu();
+}
+
+void oc_restore_fpu_c(void){}

Copied: trunk/theora/lib/huffdec.c (from rev 16442, trunk/theora/lib/dec/huffdec.c)
===================================================================
--- trunk/theora/lib/huffdec.c	                        (rev 0)
+++ trunk/theora/lib/huffdec.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,489 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
+#define _ogg_offsetof(_type,_field)\
+ ((size_t)((char *)&((_type *)0)->_field-(char *)0))
+
+/*The number of internal tokens associated with each of the spec tokens.*/
+static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
+  1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
+};
+
+/*The map from external spec-defined tokens to internal tokens.
+  This is constructed so that any extra bits read with the original token value
+   can be masked off the least significant bits of its internal token index.
+  In addition, all of the tokens which require additional extra bits are placed
+   at the start of the list, and grouped by type.
+  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
+   giving it index 0 may simplify comparisons on some architectures.
+  These requirements require some substantial reordering.*/
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  15,
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  16,
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  17,
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
+  88,
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
+  80,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+   1,
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+   0,
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
+  48,
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
+  14,
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  56,
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  57,
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  58,
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  59,
+  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
+  60,
+  62,
+  64,
+  66,
+  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
+  68,
+  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
+  72,
+  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
+   2,
+  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
+   4,
+  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
+   6,
+  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
+   8,
+  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
+  18,
+  20,
+  22,
+  24,
+  26,
+  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
+  32,
+  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
+  12,
+  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
+  28,
+  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
+  40
+};
+
+/*These three functions are really part of the bitpack.c module, but
+   they are only used here.
+  Declaring local static versions so they can be inlined saves considerable
+   function call overhead.*/
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  /*This version of _refill() doesn't bother setting eof because we won't
+     check for it after we've started decoding DCT tokens.*/
+  if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  while(available<=OC_PB_WINDOW_SIZE-8){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+    if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  }
+  _b->ptr=ptr;
+  if(_bits>available)window|=*ptr>>(available&7);
+  _b->bits=available;
+  return window;
+}
+
+
+/*Read in bits without advancing the bit pointer.
+  Here we assume 0<=_bits&&_bits<=32.*/
+static long oc_pack_look(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  return result;
+}
+
+/*Advance the bit pointer.*/
+static void oc_pack_adv(oc_pack_buf *_b,int _bits){
+  /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
+     never used actually used.
+    OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
+     32 bits in a single go, and would require a 32 GB lookup table (assuming
+     8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
+  _b->window<<=_bits;
+  _b->bits-=_bits;
+}
+
+
+/*The log_2 of the size of a lookup table is allowed to grow to relative to
+   the number of unique nodes it contains.
+  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
+   wasted (each node will have an amortized cost of at most 20 bytes when using
+   4-byte pointers).
+  Larger numbers can decode tokens with fewer read operations, while smaller
+   numbers may save more space (requiring as little as 8 bytes amortized per
+   node, though there will be more nodes).
+  With a sample file:
+  32233473 read calls are required when no tree collapsing is done (100.0%).
+  19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
+  11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
+  10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
+  10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
+  Since a value of 1 gets us the vast majority of the speed-up with only a
+   small amount of wasted memory, this is what we use.*/
+#define OC_HUFF_SLUSH (1)
+
+
+/*Determines the size in bytes of a Huffman tree node that represents a
+   subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise 1<<_nbits pointers are allocated for children.
+  Return: The number of bytes required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+  size_t size;
+  size=_ogg_offsetof(oc_huff_node,nodes);
+  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
+  return size;
+}
+
+static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
+  oc_huff_node *ret;
+  ret=(oc_huff_node *)*_storage;
+  ret->nbits=(unsigned char)_nbits;
+  (*_storage)+=_size;
+  return ret;
+}
+
+
+/*Determines the size in bytes of a Huffman tree.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise storage for 1<<_nbits pointers are added for children.
+  Return: The number of bytes required to store the tree.*/
+static size_t oc_huff_tree_size(const oc_huff_node *_node){
+  size_t size;
+  size=oc_huff_node_size(_node->nbits);
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
+      size+=oc_huff_tree_size(_node->nodes[i]);
+    }
+  }
+  return size;
+}
+
+
+/*Unpacks a sub-tree from the given buffer.
+  _opb:      The buffer to unpack from.
+  _binodes:  The nodes to store the sub-tree in.
+  _nbinodes: The number of nodes available for the sub-tree.
+  Return: 0 on success, or a negative value on error.*/
+static int oc_huff_tree_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_binodes,int _nbinodes){
+  oc_huff_node *binode;
+  long          bits;
+  int           nused;
+  if(_nbinodes<1)return TH_EBADHEADER;
+  binode=_binodes;
+  nused=0;
+  bits=oc_pack_read1(_opb);
+  if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  /*Read an internal node:*/
+  if(!bits){
+    int ret;
+    nused++;
+    binode->nbits=1;
+    binode->depth=1;
+    binode->nodes[0]=_binodes+nused;
+    ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    if(ret>=0){
+      nused+=ret;
+      binode->nodes[1]=_binodes+nused;
+      ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    }
+    if(ret<0)return ret;
+    nused+=ret;
+  }
+  /*Read a leaf node:*/
+  else{
+    int ntokens;
+    int token;
+    int i;
+    bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+    /*Find out how many internal tokens we translate this external token into.*/
+    ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
+    if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
+    /*Fill in a complete binary tree pointing to the internal tokens.*/
+    for(i=1;i<ntokens;i<<=1){
+      int j;
+      binode=_binodes+nused;
+      nused+=i;
+      for(j=0;j<i;j++){
+        binode[j].nbits=1;
+        binode[j].depth=1;
+        binode[j].nodes[0]=_binodes+nused+2*j;
+        binode[j].nodes[1]=_binodes+nused+2*j+1;
+      }
+    }
+    /*And now the leaf nodes with those tokens.*/
+    token=OC_DCT_TOKEN_MAP[bits];
+    for(i=0;i<ntokens;i++){
+      binode=_binodes+nused++;
+      binode->nbits=0;
+      binode->depth=1;
+      binode->token=token+i;
+    }
+  }
+  return nused;
+}
+
+/*Finds the depth of shortest branch of the given sub-tree.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The smallest depth of a leaf node in this sub-tree.
+          0 indicates this sub-tree is a leaf node.*/
+static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+  int depth0;
+  int depth1;
+  if(_binode->nbits==0)return 0;
+  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
+  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+  return OC_MINI(depth0,depth1)+1;
+}
+
+/*Finds the number of internal nodes at a given depth, plus the number of
+   leaves at that depth or shallower.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The number of entries that would be contained in a jump table of the
+           given depth.*/
+static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
+  if(_binode->nbits==0||_depth<=0)return 1;
+  else{
+    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+  }
+}
+
+/*Makes a copy of the given Huffman tree.
+  _node: The Huffman tree to copy.
+  Return: The copy of the Huffman tree.*/
+static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
+ char **_storage){
+  oc_huff_node *ret;
+  ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
+  ret->depth=_node->depth;
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    int inext;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;){
+      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
+      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
+      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+    }
+  }
+  else ret->token=_node->token;
+  return ret;
+}
+
+static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
+  size_t size;
+  int    mindepth;
+  int    depth;
+  int    loccupancy;
+  int    occupancy;
+  if(_binode->nbits!=0&&_depth>0){
+    return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
+  }
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  size=oc_huff_node_size(depth);
+  if(depth>0){
+    size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
+    size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
+  }
+  return size;
+}
+
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage);
+
+/*Fills the given nodes table with all the children in the sub-tree at the
+   given depth.
+  The nodes in the sub-tree with a depth less than that stored in the table
+   are freed.
+  The sub-tree must be binary and complete up until the given depth.
+  _nodes:  The nodes table to fill.
+  _binode: The root of the sub-tree to fill it with.
+           _binode->nbits must be 0 or 1.
+  _level:  The current level in the table.
+           0 indicates that the current node should be stored, regardless of
+            whether it is a leaf node or an internal node.
+  _depth:  The depth of the nodes to fill the table with, relative to their
+            parent.*/
+static void oc_huff_node_fill(oc_huff_node **_nodes,
+ oc_huff_node *_binode,int _level,int _depth,char **_storage){
+  if(_level<=0||_binode->nbits==0){
+    int i;
+    _binode->depth=(unsigned char)(_depth-_level);
+    _nodes[0]=oc_huff_tree_collapse(_binode,_storage);
+    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+  }
+  else{
+    _level--;
+    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
+    _nodes+=1<<_level;
+    oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
+  }
+}
+
+/*Finds the largest complete sub-tree rooted at the current node and collapses
+   it into a single node.
+  This procedure is then applied recursively to all the children of that node.
+  _binode: The root of the sub-tree to collapse.
+           _binode->nbits must be 0 or 1.
+  Return: The new root of the collapsed sub-tree.*/
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage){
+  oc_huff_node *root;
+  size_t        size;
+  int           mindepth;
+  int           depth;
+  int           loccupancy;
+  int           occupancy;
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  if(depth<=1)return oc_huff_tree_copy(_binode,_storage);
+  size=oc_huff_node_size(depth);
+  root=oc_huff_node_init(_storage,size,depth);
+  root->depth=_binode->depth;
+  oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
+  return root;
+}
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+   representation.
+  _opb:   The buffer to unpack the trees from.
+  _nodes: The table to fill with the Huffman trees.
+  Return: 0 on success, or a negative value on error.*/
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_node  nodes[511];
+    char         *storage;
+    size_t        size;
+    int           ret;
+    /*Unpack the full tree into a temporary buffer.*/
+    ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
+    if(ret<0)return ret;
+    /*Figure out how big the collapsed tree will be.*/
+    size=oc_huff_tree_collapse_size(nodes,0);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL)return TH_EFAULT;
+    /*And collapse it.*/
+    _nodes[i]=oc_huff_tree_collapse(nodes,&storage);
+  }
+  return 0;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+  _dst: The array to store the copy in.
+  _src: The array of trees to copy.*/
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    size_t  size;
+    char   *storage;
+    size=oc_huff_tree_size(_src[i]);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL){
+      while(i-->0)_ogg_free(_dst[i]);
+      return TH_EFAULT;
+    }
+    _dst[i]=oc_huff_tree_copy(_src[i],&storage);
+  }
+  return 0;
+}
+
+/*Frees the memory used by a set of Huffman trees.
+  _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
+}
+
+/*Unpacks a single token using the given Huffman tree.
+  _opb:  The buffer to unpack the token from.
+  _node: The tree to unpack the token with.
+  Return: The token value.*/
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
+  long bits;
+  while(_node->nbits!=0){
+    bits=oc_pack_look(_opb,_node->nbits);
+    _node=_node->nodes[bits];
+    oc_pack_adv(_opb,_node->depth);
+  }
+  return _node->token;
+}

Copied: trunk/theora/lib/huffdec.h (from rev 16442, trunk/theora/lib/dec/huffdec.h)
===================================================================
--- trunk/theora/lib/huffdec.h	                        (rev 0)
+++ trunk/theora/lib/huffdec.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,92 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_huffdec_H)
+# define _huffdec_H (1)
+# include "huffman.h"
+# include "bitpack.h"
+
+
+
+typedef struct oc_huff_node oc_huff_node;
+
+/*A node in the Huffman tree.
+  Instead of storing every branching in the tree, subtrees can be collapsed
+   into one node, with a table of size 1<<nbits pointing directly to its
+   descedents nbits levels down.
+  This allows more than one bit to be read at a time, and avoids following all
+   the intermediate branches with next to no increased code complexity once
+   the collapsed tree has been built.
+  We do _not_ require that a subtree be complete to be collapsed, but instead
+   store duplicate pointers in the table, and record the actual depth of the
+   node below its parent.
+  This tells us the number of bits to advance the stream after reaching it.
+
+  This turns out to be equivalent to the method described in \cite{Hash95},
+   without the requirement that codewords be sorted by length.
+  If the codewords were sorted by length (so-called ``canonical-codes''), they
+   could be decoded much faster via either Lindell and Moffat's approach or
+   Hashemian's Condensed Huffman Code approach, the latter of which has an
+   extremely small memory footprint.
+  We can't use Choueka et al.'s finite state machine approach, which is
+   extremely fast, because we can't allow multiple symbols to be output at a
+   time; the codebook can and does change between symbols.
+  It also has very large memory requirements, which impairs cache coherency.
+
+  @ARTICLE{Hash95,
+    author="Reza Hashemian",
+    title="Memory Efficient and High-Speed Search {Huffman} Coding",
+    journal="{IEEE} Transactions on Communications",
+    volume=43,
+    number=10,
+    pages="2576--2581",
+    month=Oct,
+    year=1995
+  }*/
+struct oc_huff_node{
+  /*The number of bits of the code needed to descend through this node.
+    0 indicates a leaf node.
+    Otherwise there are 1<<nbits nodes in the nodes table, which can be
+     indexed by reading nbits bits from the stream.*/
+  unsigned char  nbits;
+  /*The value of a token stored in a leaf node.
+    The value in non-leaf nodes is undefined.*/
+  unsigned char  token;
+  /*The depth of the current node, relative to its parent in the collapsed
+     tree.
+    This can be less than its parent's nbits value, in which case there are
+     1<<nbits-depth copies of this node in the table, and the bitstream should
+     only be advanced depth bits after reaching this node.*/
+  unsigned char  depth;
+  /*The table of child nodes.
+    The ACTUAL size of this array is 1<<nbits, despite what the declaration
+     below claims.
+    The exception is that for leaf nodes the size is 0.*/
+  oc_huff_node  *nodes[2];
+};
+
+
+
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
+
+
+#endif

Copied: trunk/theora/lib/huffenc.c (from rev 16442, trunk/theora/lib/enc/huffenc.c)
===================================================================
--- trunk/theora/lib/huffenc.c	                        (rev 0)
+++ trunk/theora/lib/huffenc.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,910 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffenc.h"
+
+
+
+/*The default Huffman codes used for VP3.1.*/
+const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
+  {
+    {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8},
+    {0x02CE,10},{0x059E,11},{0x027D,11},{0x0008, 5},
+    {0x04F9,12},{0x000F, 4},{0x000E, 4},{0x001B, 5},
+    {0x0006, 4},{0x0008, 4},{0x0005, 4},{0x001A, 5},
+    {0x0015, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x0029, 6},
+    {0x0028, 6},{0x00B2, 8},{0x04F8,12},{0x059F,11},
+    {0x009E, 9},{0x013F,10},{0x0012, 6},{0x0058, 7}
+  },
+  {
+    {0x0010, 5},{0x0047, 7},{0x01FF, 9},{0x008C, 8},
+    {0x03FC,10},{0x046A,11},{0x0469,11},{0x0022, 6},
+    {0x11A1,13},{0x000E, 4},{0x000D, 4},{0x0004, 4},
+    {0x0005, 4},{0x0009, 4},{0x0006, 4},{0x001E, 5},
+    {0x0016, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x000A, 4},{0x0017, 5},{0x007D, 7},
+    {0x007E, 7},{0x011B, 9},{0x08D1,12},{0x03FD,10},
+    {0x046B,11},{0x11A0,13},{0x007C, 7},{0x00FE, 8}
+  },
+  {
+    {0x0016, 5},{0x0020, 6},{0x0086, 8},{0x0087, 8},
+    {0x0367,10},{0x06CC,11},{0x06CB,11},{0x006E, 7},
+    {0x366D,14},{0x000F, 4},{0x000E, 4},{0x0004, 4},
+    {0x0005, 4},{0x000A, 4},{0x0006, 4},{0x001A, 5},
+    {0x0011, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x006F, 7},
+    {0x006D, 7},{0x0364,10},{0x0D9A,12},{0x06CA,11},
+    {0x1B37,13},{0x366C,14},{0x0042, 7},{0x00D8, 8}
+  },
+  {
+    {0x0000, 4},{0x002D, 6},{0x00F7, 8},{0x0058, 7},
+    {0x0167, 9},{0x02CB,10},{0x02CA,10},{0x000E, 6},
+    {0x1661,13},{0x0003, 3},{0x0002, 3},{0x0008, 4},
+    {0x0009, 4},{0x000D, 4},{0x0002, 4},{0x001F, 5},
+    {0x0017, 5},{0x0001, 4},{0x000C, 4},{0x000E, 4},
+    {0x000A, 4},{0x0006, 5},{0x0078, 7},{0x000F, 6},
+    {0x007A, 7},{0x0164, 9},{0x0599,11},{0x02CD,10},
+    {0x0B31,12},{0x1660,13},{0x0079, 7},{0x00F6, 8}
+  },
+  {
+    {0x0003, 4},{0x003C, 6},{0x000F, 7},{0x007A, 7},
+    {0x001D, 8},{0x0020, 9},{0x0072,10},{0x0006, 6},
+    {0x0399,13},{0x0004, 3},{0x0005, 3},{0x0005, 4},
+    {0x0006, 4},{0x000E, 4},{0x0004, 4},{0x0000, 4},
+    {0x0019, 5},{0x0002, 4},{0x000D, 4},{0x0007, 4},
+    {0x001F, 5},{0x0030, 6},{0x0011, 8},{0x0031, 6},
+    {0x0005, 6},{0x0021, 9},{0x00E7,11},{0x0038, 9},
+    {0x01CD,12},{0x0398,13},{0x007B, 7},{0x0009, 7}
+  },
+  {
+    {0x0009, 4},{0x0002, 5},{0x0074, 7},{0x0007, 6},
+    {0x00EC, 8},{0x00D1, 9},{0x01A6,10},{0x0006, 6},
+    {0x0D21,13},{0x0005, 3},{0x0006, 3},{0x0008, 4},
+    {0x0007, 4},{0x000F, 4},{0x0004, 4},{0x0000, 4},
+    {0x001C, 5},{0x0002, 4},{0x0005, 4},{0x0003, 4},
+    {0x000C, 5},{0x0035, 7},{0x01A7,10},{0x001B, 6},
+    {0x0077, 7},{0x01A5,10},{0x0349,11},{0x00D0, 9},
+    {0x0691,12},{0x0D20,13},{0x0075, 7},{0x00ED, 8}
+  },
+  {
+    {0x000A, 4},{0x000C, 5},{0x0012, 6},{0x001B, 6},
+    {0x00B7, 8},{0x016C, 9},{0x0099, 9},{0x005A, 7},
+    {0x16D8,13},{0x0007, 3},{0x0006, 3},{0x0009, 4},
+    {0x0008, 4},{0x0000, 3},{0x0005, 4},{0x0017, 5},
+    {0x000E, 5},{0x0002, 4},{0x0003, 4},{0x000F, 5},
+    {0x001A, 6},{0x004D, 8},{0x2DB3,14},{0x002C, 6},
+    {0x0011, 6},{0x02DA,10},{0x05B7,11},{0x0098, 9},
+    {0x0B6D,12},{0x2DB2,14},{0x0010, 6},{0x0027, 7}
+  },
+  {
+    {0x000D, 4},{0x000F, 5},{0x001D, 6},{0x0008, 5},
+    {0x0051, 7},{0x0056, 8},{0x00AF, 9},{0x002A, 7},
+    {0x148A,13},{0x0007, 3},{0x0000, 2},{0x0008, 4},
+    {0x0009, 4},{0x000C, 4},{0x0006, 4},{0x0017, 5},
+    {0x000B, 5},{0x0016, 5},{0x0015, 5},{0x0009, 5},
+    {0x0050, 7},{0x00AE, 9},{0x2917,14},{0x001C, 6},
+    {0x0014, 6},{0x0290,10},{0x0523,11},{0x0149, 9},
+    {0x0A44,12},{0x2916,14},{0x0053, 7},{0x00A5, 8}
+  },
+  {
+    {0x0001, 4},{0x001D, 6},{0x00F5, 8},{0x00F4, 8},
+    {0x024D,10},{0x0499,11},{0x0498,11},{0x0001, 5},
+    {0x0021, 6},{0x0006, 3},{0x0005, 3},{0x0006, 4},
+    {0x0005, 4},{0x0002, 4},{0x0007, 5},{0x0025, 6},
+    {0x007B, 7},{0x001C, 6},{0x0020, 6},{0x000D, 6},
+    {0x0048, 7},{0x0092, 8},{0x0127, 9},{0x000E, 4},
+    {0x0004, 4},{0x0011, 5},{0x000C, 6},{0x003C, 6},
+    {0x000F, 5},{0x0000, 5},{0x001F, 5},{0x0013, 5}
+  },
+  {
+    {0x0005, 4},{0x003C, 6},{0x0040, 7},{0x000D, 7},
+    {0x0031, 9},{0x0061,10},{0x0060,10},{0x0002, 5},
+    {0x00F5, 8},{0x0006, 3},{0x0005, 3},{0x0007, 4},
+    {0x0006, 4},{0x0002, 4},{0x0009, 5},{0x0025, 6},
+    {0x0007, 6},{0x0021, 6},{0x0024, 6},{0x0010, 6},
+    {0x0041, 7},{0x00F4, 8},{0x0019, 8},{0x000E, 4},
+    {0x0003, 4},{0x0011, 5},{0x0011, 6},{0x003F, 6},
+    {0x003E, 6},{0x007B, 7},{0x0000, 4},{0x0013, 5}
+  },
+  {
+    {0x000A, 4},{0x0007, 5},{0x0001, 6},{0x0009, 6},
+    {0x0131, 9},{0x0261,10},{0x0260,10},{0x0015, 6},
+    {0x0001, 7},{0x0007, 3},{0x0006, 3},{0x0008, 4},
+    {0x0007, 4},{0x0006, 4},{0x0012, 5},{0x002F, 6},
+    {0x0014, 6},{0x0027, 6},{0x002D, 6},{0x0016, 6},
+    {0x004D, 7},{0x0099, 8},{0x0000, 7},{0x0004, 4},
+    {0x0001, 4},{0x0005, 5},{0x0017, 6},{0x002E, 6},
+    {0x002C, 6},{0x0008, 6},{0x0006, 5},{0x0001, 5}
+  },
+  {
+    {0x0000, 3},{0x000E, 5},{0x0017, 6},{0x002A, 6},
+    {0x0010, 7},{0x00F9,10},{0x00F8,10},{0x001E, 7},
+    {0x003F, 8},{0x0007, 3},{0x0006, 3},{0x0009, 4},
+    {0x0008, 4},{0x0006, 4},{0x000F, 5},{0x0005, 5},
+    {0x0016, 6},{0x0029, 6},{0x002B, 6},{0x0015, 6},
+    {0x0050, 7},{0x0011, 7},{0x007D, 9},{0x0004, 4},
+    {0x0017, 5},{0x0006, 5},{0x0014, 6},{0x002C, 6},
+    {0x002D, 6},{0x000E, 6},{0x0009, 6},{0x0051, 7}
+  },
+  {
+    {0x0002, 3},{0x0018, 5},{0x002F, 6},{0x000D, 5},
+    {0x0053, 7},{0x0295,10},{0x0294,10},{0x00A4, 8},
+    {0x007C, 8},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x001B, 5},{0x000C, 5},{0x0028, 6},
+    {0x006A, 7},{0x001E, 6},{0x001D, 6},{0x0069, 7},
+    {0x00D7, 8},{0x007D, 8},{0x014B, 9},{0x0019, 5},
+    {0x0016, 5},{0x002E, 6},{0x001C, 6},{0x002B, 6},
+    {0x002A, 6},{0x0068, 7},{0x003F, 7},{0x00D6, 8}
+  },
+  {
+    {0x0002, 3},{0x001B, 5},{0x000C, 5},{0x0018, 5},
+    {0x0029, 6},{0x007F, 8},{0x02F0,10},{0x0198, 9},
+    {0x0179, 9},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x001A, 5},{0x000D, 5},{0x002A, 6},
+    {0x0064, 7},{0x001E, 6},{0x0067, 7},{0x005F, 7},
+    {0x00CD, 8},{0x007E, 8},{0x02F1,10},{0x0016, 5},
+    {0x000E, 5},{0x002E, 6},{0x0065, 7},{0x002B, 6},
+    {0x0028, 6},{0x003E, 7},{0x00BD, 8},{0x0199, 9}
+  },
+  {
+    {0x0002, 3},{0x0007, 4},{0x0016, 5},{0x0006, 4},
+    {0x0036, 6},{0x005C, 7},{0x015D, 9},{0x015C, 9},
+    {0x02BF,10},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x0018, 5},{0x0034, 6},{0x002A, 6},
+    {0x005E, 7},{0x006A, 7},{0x0064, 7},{0x005D, 7},
+    {0x00CB, 8},{0x00AD, 8},{0x02BE,10},{0x0014, 5},
+    {0x0033, 6},{0x006E, 7},{0x005F, 7},{0x006F, 7},
+    {0x006B, 7},{0x00CA, 8},{0x00AC, 8},{0x015E, 9}
+  },
+  {
+    {0x000F, 4},{0x001D, 5},{0x0018, 5},{0x000B, 4},
+    {0x0019, 5},{0x0029, 6},{0x00D6, 8},{0x0551,11},
+    {0x0AA1,12},{0x0001, 2},{0x0000, 2},{0x0009, 4},
+    {0x0008, 4},{0x001B, 5},{0x0038, 6},{0x0028, 6},
+    {0x0057, 7},{0x006A, 7},{0x0068, 7},{0x0056, 7},
+    {0x00E5, 8},{0x0155, 9},{0x0AA0,12},{0x0073, 7},
+    {0x0069, 7},{0x00D7, 8},{0x00AB, 8},{0x00E4, 8},
+    {0x00A9, 8},{0x0151, 9},{0x0150, 9},{0x02A9,10}
+  },
+  {
+    {0x0008, 5},{0x0025, 7},{0x017A, 9},{0x02F7,10},
+    {0x0BDB,12},{0x17B4,13},{0x2F6B,14},{0x001D, 5},
+    {0x2F6A,14},{0x0008, 4},{0x0007, 4},{0x0001, 4},
+    {0x0002, 4},{0x000A, 4},{0x0006, 4},{0x0000, 4},
+    {0x001C, 5},{0x0009, 4},{0x000D, 4},{0x000F, 4},
+    {0x000C, 4},{0x0003, 4},{0x000A, 5},{0x0016, 5},
+    {0x0013, 6},{0x005D, 7},{0x0024, 7},{0x00BC, 8},
+    {0x005C, 7},{0x05EC,11},{0x000B, 5},{0x005F, 7}
+  },
+  {
+    {0x000F, 5},{0x0010, 6},{0x004B, 8},{0x00C6, 8},
+    {0x031D,10},{0x0C71,12},{0x0C70,12},{0x0001, 4},
+    {0x0C73,12},{0x0008, 4},{0x0009, 4},{0x0002, 4},
+    {0x0003, 4},{0x000B, 4},{0x0006, 4},{0x0000, 4},
+    {0x001C, 5},{0x0005, 4},{0x000D, 4},{0x000F, 4},
+    {0x000A, 4},{0x0019, 5},{0x0013, 6},{0x001D, 5},
+    {0x0030, 6},{0x0062, 7},{0x0024, 7},{0x004A, 8},
+    {0x018F, 9},{0x0C72,12},{0x000E, 5},{0x0011, 6}
+  },
+  {
+    {0x001B, 5},{0x0003, 6},{0x008D, 8},{0x0040, 7},
+    {0x0239,10},{0x0471,11},{0x08E0,12},{0x0003, 4},
+    {0x11C3,13},{0x000A, 4},{0x0009, 4},{0x0004, 4},
+    {0x0005, 4},{0x000E, 4},{0x0007, 4},{0x0001, 4},
+    {0x001E, 5},{0x0006, 4},{0x000C, 4},{0x000B, 4},
+    {0x0002, 4},{0x0000, 5},{0x0041, 7},{0x001F, 5},
+    {0x0022, 6},{0x0002, 6},{0x008F, 8},{0x008C, 8},
+    {0x011D, 9},{0x11C2,13},{0x001A, 5},{0x0021, 6}
+  },
+  {
+    {0x001F, 5},{0x0003, 6},{0x0003, 7},{0x0043, 7},
+    {0x000B, 9},{0x0015,10},{0x0051,12},{0x0003, 4},
+    {0x0050,12},{0x000D, 4},{0x000C, 4},{0x0004, 4},
+    {0x0006, 4},{0x000E, 4},{0x000A, 4},{0x0001, 4},
+    {0x001E, 5},{0x0005, 4},{0x0009, 4},{0x0007, 4},
+    {0x0011, 5},{0x0002, 6},{0x0004, 8},{0x0002, 4},
+    {0x002D, 6},{0x0020, 6},{0x0042, 7},{0x0001, 7},
+    {0x0000, 7},{0x0029,11},{0x0017, 5},{0x002C, 6}
+  },
+  {
+    {0x0003, 4},{0x001F, 6},{0x003A, 7},{0x005D, 7},
+    {0x0173, 9},{0x02E4,10},{0x172D,13},{0x0004, 4},
+    {0x172C,13},{0x000F, 4},{0x000E, 4},{0x0009, 4},
+    {0x0008, 4},{0x000C, 4},{0x000A, 4},{0x0001, 4},
+    {0x0016, 5},{0x0002, 4},{0x0005, 4},{0x001A, 5},
+    {0x002F, 6},{0x0038, 7},{0x05CA,11},{0x0006, 4},
+    {0x0037, 6},{0x001E, 6},{0x003B, 7},{0x0039, 7},
+    {0x00B8, 8},{0x0B97,12},{0x0000, 4},{0x0036, 6}
+  },
+  {
+    {0x0006, 4},{0x0037, 6},{0x005D, 7},{0x000C, 6},
+    {0x00B9, 8},{0x02E3,10},{0x05C4,11},{0x0004, 4},
+    {0x1715,13},{0x0000, 3},{0x000F, 4},{0x0008, 4},
+    {0x0007, 4},{0x000C, 4},{0x0009, 4},{0x001D, 5},
+    {0x0016, 5},{0x001C, 5},{0x001A, 5},{0x000B, 5},
+    {0x005E, 7},{0x0170, 9},{0x1714,13},{0x000A, 4},
+    {0x000A, 5},{0x0036, 6},{0x005F, 7},{0x001B, 7},
+    {0x001A, 7},{0x0B8B,12},{0x0002, 4},{0x0007, 5}
+  },
+  {
+    {0x000C, 4},{0x000B, 5},{0x0079, 7},{0x0022, 6},
+    {0x00F0, 8},{0x0119, 9},{0x0230,10},{0x001D, 5},
+    {0x08C4,12},{0x0001, 3},{0x0000, 3},{0x000A, 4},
+    {0x0009, 4},{0x000B, 4},{0x0007, 4},{0x001C, 5},
+    {0x003D, 6},{0x000D, 5},{0x0008, 5},{0x0015, 6},
+    {0x008D, 8},{0x118B,13},{0x118A,13},{0x000D, 4},
+    {0x0010, 5},{0x0009, 5},{0x0014, 6},{0x0047, 7},
+    {0x00F1, 8},{0x0463,11},{0x001F, 5},{0x000C, 5}
+  },
+  {
+    {0x0000, 3},{0x001A, 5},{0x0033, 6},{0x000C, 5},
+    {0x0046, 7},{0x01E3, 9},{0x03C5,10},{0x0017, 5},
+    {0x1E21,13},{0x0002, 3},{0x0001, 3},{0x0009, 4},
+    {0x000A, 4},{0x0007, 4},{0x001B, 5},{0x003D, 6},
+    {0x001B, 6},{0x0022, 6},{0x0079, 7},{0x00F0, 8},
+    {0x1E20,13},{0x1E23,13},{0x1E22,13},{0x000E, 4},
+    {0x0016, 5},{0x0018, 5},{0x0032, 6},{0x001A, 6},
+    {0x0047, 7},{0x0789,11},{0x001F, 5},{0x0010, 5}
+  },
+  {
+    {0x001D, 5},{0x0061, 7},{0x004E, 8},{0x009E, 9},
+    {0x027C,11},{0x09F5,13},{0x09F4,13},{0x0003, 4},
+    {0x0060, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
+    {0x000A, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
+    {0x0031, 6},{0x0008, 5},{0x0038, 6},{0x0012, 6},
+    {0x0026, 7},{0x013F,10},{0x04FB,12},{0x000D, 4},
+    {0x0002, 4},{0x000C, 5},{0x0039, 6},{0x001C, 6},
+    {0x000F, 5},{0x001D, 6},{0x0008, 4},{0x0019, 5}
+  },
+  {
+    {0x0007, 4},{0x0019, 6},{0x00AB, 8},{0x00AA, 8},
+    {0x0119,10},{0x0461,12},{0x0460,12},{0x001B, 5},
+    {0x0047, 8},{0x0001, 3},{0x0000, 3},{0x000C, 4},
+    {0x000B, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
+    {0x0035, 6},{0x003D, 6},{0x003C, 6},{0x0018, 6},
+    {0x0022, 7},{0x008D, 9},{0x0231,11},{0x000E, 4},
+    {0x001F, 5},{0x0009, 5},{0x002B, 6},{0x0010, 6},
+    {0x0034, 6},{0x0054, 7},{0x0008, 4},{0x0014, 5}
+  },
+  {
+    {0x000C, 4},{0x0005, 5},{0x0008, 6},{0x005B, 7},
+    {0x004D, 9},{0x0131,11},{0x0261,12},{0x001A, 5},
+    {0x0012, 7},{0x0000, 3},{0x000F, 4},{0x000A, 4},
+    {0x0009, 4},{0x0006, 4},{0x001B, 5},{0x0006, 5},
+    {0x001C, 6},{0x002C, 6},{0x0015, 6},{0x005A, 7},
+    {0x0027, 8},{0x0099,10},{0x0260,12},{0x000E, 4},
+    {0x0004, 4},{0x000F, 5},{0x0007, 5},{0x001D, 6},
+    {0x000B, 5},{0x0014, 6},{0x0008, 4},{0x0017, 5}
+  },
+  {
+    {0x000F, 4},{0x0013, 5},{0x0075, 7},{0x0024, 6},
+    {0x0095, 8},{0x0251,10},{0x04A0,11},{0x0010, 5},
+    {0x00C8, 8},{0x0002, 3},{0x0001, 3},{0x0001, 4},
+    {0x0000, 4},{0x001A, 5},{0x0011, 5},{0x002C, 6},
+    {0x0065, 7},{0x0074, 7},{0x004B, 7},{0x00C9, 8},
+    {0x0129, 9},{0x0943,12},{0x0942,12},{0x0003, 3},
+    {0x000A, 4},{0x001C, 5},{0x0018, 5},{0x0033, 6},
+    {0x0017, 5},{0x002D, 6},{0x001B, 5},{0x003B, 6}
+  },
+  {
+    {0x0003, 3},{0x001A, 5},{0x002D, 6},{0x0038, 6},
+    {0x0028, 7},{0x0395,10},{0x0E51,12},{0x0037, 6},
+    {0x00E4, 8},{0x0001, 3},{0x0000, 3},{0x001F, 5},
+    {0x001E, 5},{0x0017, 5},{0x003A, 6},{0x0073, 7},
+    {0x002A, 7},{0x002B, 7},{0x0029, 7},{0x01CB, 9},
+    {0x0729,11},{0x1CA1,13},{0x1CA0,13},{0x0004, 3},
+    {0x000A, 4},{0x0004, 4},{0x0018, 5},{0x0036, 6},
+    {0x000B, 5},{0x002C, 6},{0x0019, 5},{0x003B, 6}
+  },
+  {
+    {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0017, 5},
+    {0x0075, 7},{0x01F5, 9},{0x07D1,11},{0x0017, 6},
+    {0x01F6, 9},{0x0001, 3},{0x0000, 3},{0x001B, 5},
+    {0x001A, 5},{0x000A, 5},{0x0032, 6},{0x0074, 7},
+    {0x00F8, 8},{0x00F9, 8},{0x01F7, 9},{0x03E9,10},
+    {0x0FA0,12},{0x1F43,13},{0x1F42,13},{0x0003, 3},
+    {0x000A, 4},{0x001E, 5},{0x001C, 5},{0x003B, 6},
+    {0x0018, 5},{0x0016, 6},{0x0016, 5},{0x0033, 6}
+  },
+  {
+    {0x0004, 3},{0x0007, 4},{0x0018, 5},{0x001E, 5},
+    {0x0036, 6},{0x0031, 7},{0x0177, 9},{0x0077, 7},
+    {0x0176, 9},{0x0001, 3},{0x0000, 3},{0x001A, 5},
+    {0x0019, 5},{0x003A, 6},{0x0019, 6},{0x005C, 7},
+    {0x00BA, 8},{0x0061, 8},{0x00C1, 9},{0x0180,10},
+    {0x0302,11},{0x0607,12},{0x0606,12},{0x0002, 3},
+    {0x000A, 4},{0x001F, 5},{0x001C, 5},{0x0037, 6},
+    {0x0016, 5},{0x0076, 7},{0x000D, 5},{0x002F, 6}
+  },
+  {
+    {0x0000, 3},{0x000A, 4},{0x001A, 5},{0x000C, 4},
+    {0x001D, 5},{0x0039, 6},{0x0078, 7},{0x005E, 7},
+    {0x0393,11},{0x0002, 3},{0x0001, 3},{0x0016, 5},
+    {0x000F, 5},{0x002E, 6},{0x005F, 7},{0x0073, 8},
+    {0x00E5, 9},{0x01C8,10},{0x0E4A,13},{0x1C97,14},
+    {0x1C96,14},{0x0E49,13},{0x0E48,13},{0x0004, 3},
+    {0x0006, 4},{0x001F, 5},{0x001B, 5},{0x001D, 6},
+    {0x0038, 6},{0x0038, 7},{0x003D, 6},{0x0079, 7}
+  },
+  {
+    {0x000B, 5},{0x002B, 7},{0x0054, 8},{0x01B7, 9},
+    {0x06D9,11},{0x0DB1,12},{0x0DB0,12},{0x0002, 4},
+    {0x00AB, 9},{0x0009, 4},{0x000A, 4},{0x0007, 4},
+    {0x0008, 4},{0x000F, 4},{0x000C, 4},{0x0003, 4},
+    {0x001D, 5},{0x0004, 4},{0x000B, 4},{0x0006, 4},
+    {0x001A, 5},{0x0003, 6},{0x00AA, 9},{0x0001, 4},
+    {0x0000, 5},{0x0014, 6},{0x006C, 7},{0x00DA, 8},
+    {0x0002, 6},{0x036D,10},{0x001C, 5},{0x0037, 6}
+  },
+  {
+    {0x001D, 5},{0x0004, 6},{0x00B6, 8},{0x006A, 8},
+    {0x05B9,11},{0x16E1,13},{0x16E0,13},{0x0007, 4},
+    {0x016F, 9},{0x000C, 4},{0x000D, 4},{0x0009, 4},
+    {0x0008, 4},{0x000F, 4},{0x000A, 4},{0x0003, 4},
+    {0x0017, 5},{0x0002, 4},{0x0004, 4},{0x001C, 5},
+    {0x002C, 6},{0x006B, 8},{0x0B71,12},{0x0005, 4},
+    {0x0003, 5},{0x001B, 6},{0x005A, 7},{0x0034, 7},
+    {0x0005, 6},{0x02DD,10},{0x0000, 4},{0x000C, 5}
+  },
+  {
+    {0x0003, 4},{0x007F, 7},{0x00A1, 8},{0x00A0, 8},
+    {0x020C,10},{0x0834,12},{0x106B,13},{0x0007, 4},
+    {0x0082, 8},{0x000E, 4},{0x000D, 4},{0x000B, 4},
+    {0x000C, 4},{0x0000, 3},{0x0009, 4},{0x0002, 4},
+    {0x0011, 5},{0x001E, 5},{0x0015, 5},{0x003E, 6},
+    {0x0040, 7},{0x041B,11},{0x106A,13},{0x0006, 4},
+    {0x000A, 5},{0x0029, 6},{0x007E, 7},{0x0051, 7},
+    {0x0021, 6},{0x0107, 9},{0x0004, 4},{0x000B, 5}
+  },
+  {
+    {0x0007, 4},{0x001B, 6},{0x00F6, 8},{0x00E9, 8},
+    {0x03A1,10},{0x0740,11},{0x0E82,12},{0x001F, 5},
+    {0x01EF, 9},{0x0001, 3},{0x0002, 3},{0x000B, 4},
+    {0x000C, 4},{0x000D, 4},{0x0008, 4},{0x001C, 5},
+    {0x0003, 5},{0x0012, 5},{0x0002, 5},{0x0075, 7},
+    {0x01D1, 9},{0x1D07,13},{0x1D06,13},{0x000A, 4},
+    {0x0013, 5},{0x003B, 6},{0x001A, 6},{0x007A, 7},
+    {0x003C, 6},{0x01EE, 9},{0x0000, 4},{0x000C, 5}
+  },
+  {
+    {0x000D, 4},{0x003D, 6},{0x0042, 7},{0x0037, 7},
+    {0x00D9, 9},{0x0362,11},{0x06C6,12},{0x001F, 5},
+    {0x0086, 8},{0x0001, 3},{0x0002, 3},{0x000C, 4},
+    {0x000B, 4},{0x000A, 4},{0x0001, 4},{0x000F, 5},
+    {0x0025, 6},{0x003C, 6},{0x001A, 6},{0x0087, 8},
+    {0x01B0,10},{0x0D8F,13},{0x0D8E,13},{0x000E, 4},
+    {0x0013, 5},{0x000C, 5},{0x0024, 6},{0x0020, 6},
+    {0x0011, 5},{0x006D, 8},{0x0000, 4},{0x000E, 5}
+  },
+  {
+    {0x0000, 3},{0x0012, 5},{0x0076, 7},{0x0077, 7},
+    {0x014D, 9},{0x0533,11},{0x14C9,13},{0x0013, 5},
+    {0x00A5, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x0008, 4},{0x001A, 5},{0x002B, 6},
+    {0x0075, 7},{0x0074, 7},{0x00A7, 8},{0x0298,10},
+    {0x14C8,13},{0x14CB,13},{0x14CA,13},{0x000F, 4},
+    {0x001C, 5},{0x0007, 5},{0x002A, 6},{0x0028, 6},
+    {0x001B, 5},{0x00A4, 8},{0x0002, 4},{0x0006, 5}
+  },
+  {
+    {0x0002, 3},{0x001A, 5},{0x002B, 6},{0x003A, 6},
+    {0x00ED, 8},{0x0283,10},{0x0A0A,12},{0x0004, 5},
+    {0x00A1, 8},{0x0004, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x001F, 5},{0x0006, 5},{0x0077, 7},
+    {0x00A3, 8},{0x00A2, 8},{0x0140, 9},{0x1417,13},
+    {0x1416,13},{0x0A09,12},{0x0A08,12},{0x0000, 3},
+    {0x001E, 5},{0x0007, 5},{0x002A, 6},{0x0029, 6},
+    {0x001C, 5},{0x00EC, 8},{0x001B, 5},{0x0005, 5}
+  },
+  {
+    {0x0002, 3},{0x0002, 4},{0x0018, 5},{0x001D, 5},
+    {0x0035, 6},{0x00E4, 8},{0x01CF,11},{0x001D, 7},
+    {0x0072, 9},{0x0004, 3},{0x0005, 3},{0x0006, 4},
+    {0x0007, 4},{0x0006, 5},{0x0073, 7},{0x0038, 8},
+    {0x01CE,11},{0x039B,12},{0x0398,12},{0x0733,13},
+    {0x0732,13},{0x0735,13},{0x0734,13},{0x0000, 3},
+    {0x001F, 5},{0x001B, 5},{0x0034, 6},{0x000F, 6},
+    {0x001E, 5},{0x00E5, 8},{0x0019, 5},{0x0038, 6}
+  },
+  {
+    {0x0016, 5},{0x0050, 7},{0x0172, 9},{0x02E7,10},
+    {0x1732,13},{0x2E67,14},{0x2E66,14},{0x0006, 4},
+    {0x0051, 7},{0x0001, 3},{0x0000, 3},{0x000D, 4},
+    {0x000C, 4},{0x0009, 4},{0x001C, 5},{0x0009, 5},
+    {0x001C, 6},{0x001D, 6},{0x005D, 7},{0x00B8, 8},
+    {0x05CD,11},{0x1731,13},{0x1730,13},{0x000F, 4},
+    {0x0005, 4},{0x000F, 5},{0x0008, 5},{0x0029, 6},
+    {0x001D, 5},{0x002F, 6},{0x0008, 4},{0x0015, 5}
+  },
+  {
+    {0x0009, 4},{0x0021, 6},{0x0040, 7},{0x00AD, 8},
+    {0x02B0,10},{0x1589,13},{0x1588,13},{0x001C, 5},
+    {0x005F, 7},{0x0000, 3},{0x000F, 4},{0x000D, 4},
+    {0x000C, 4},{0x0006, 4},{0x0011, 5},{0x002A, 6},
+    {0x0057, 7},{0x005E, 7},{0x0041, 7},{0x0159, 9},
+    {0x0563,11},{0x158B,13},{0x158A,13},{0x0001, 3},
+    {0x0005, 4},{0x0014, 5},{0x003B, 6},{0x002E, 6},
+    {0x0004, 4},{0x003A, 6},{0x0007, 4},{0x0016, 5}
+  },
+  {
+    {0x000E, 4},{0x0007, 5},{0x0046, 7},{0x0045, 7},
+    {0x0064, 9},{0x032A,12},{0x0657,13},{0x0018, 5},
+    {0x000D, 6},{0x0000, 3},{0x000F, 4},{0x000A, 4},
+    {0x000B, 4},{0x001A, 5},{0x0036, 6},{0x0047, 7},
+    {0x0044, 7},{0x0018, 7},{0x0033, 8},{0x00CB,10},
+    {0x0656,13},{0x0329,12},{0x0328,12},{0x0002, 3},
+    {0x0006, 4},{0x0019, 5},{0x000E, 5},{0x0037, 6},
+    {0x0009, 4},{0x000F, 5},{0x0002, 4},{0x0010, 5}
+  },
+  {
+    {0x0003, 3},{0x0018, 5},{0x0023, 6},{0x0077, 7},
+    {0x0194, 9},{0x1956,13},{0x32AF,14},{0x003A, 6},
+    {0x0076, 7},{0x0002, 3},{0x0001, 3},{0x001F, 5},
+    {0x001E, 5},{0x0014, 5},{0x0022, 6},{0x0064, 7},
+    {0x0197, 9},{0x0196, 9},{0x032B,10},{0x0654,11},
+    {0x32AE,14},{0x1955,13},{0x1954,13},{0x0000, 3},
+    {0x0009, 4},{0x001C, 5},{0x0015, 5},{0x0010, 5},
+    {0x000D, 4},{0x0017, 5},{0x0016, 5},{0x0033, 6}
+  },
+  {
+    {0x0005, 3},{0x0006, 4},{0x003E, 6},{0x0010, 5},
+    {0x0048, 7},{0x093F,12},{0x24FA,14},{0x0032, 6},
+    {0x0067, 7},{0x0002, 3},{0x0001, 3},{0x001B, 5},
+    {0x001E, 5},{0x0034, 6},{0x0066, 7},{0x0092, 8},
+    {0x0126, 9},{0x024E,10},{0x049E,11},{0x49F7,15},
+    {0x49F6,15},{0x24F9,14},{0x24F8,14},{0x0000, 3},
+    {0x0007, 4},{0x0018, 5},{0x0011, 5},{0x003F, 6},
+    {0x000E, 4},{0x0013, 5},{0x0035, 6},{0x0025, 6}
+  },
+  {
+    {0x0005, 3},{0x0008, 4},{0x0012, 5},{0x001C, 5},
+    {0x001C, 6},{0x00EA, 9},{0x1D75,14},{0x001E, 6},
+    {0x0066, 7},{0x0001, 3},{0x0002, 3},{0x001B, 5},
+    {0x001A, 5},{0x001F, 6},{0x003B, 7},{0x0074, 8},
+    {0x01D6,10},{0x03AF,11},{0x1D74,14},{0x1D77,14},
+    {0x1D76,14},{0x0EB9,13},{0x0EB8,13},{0x000F, 4},
+    {0x0006, 4},{0x0013, 5},{0x003B, 6},{0x003A, 6},
+    {0x0000, 3},{0x0018, 5},{0x0032, 6},{0x0067, 7}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x001B, 5},{0x000C, 4},
+    {0x000D, 5},{0x00E6, 8},{0x0684,11},{0x0072, 7},
+    {0x00E7, 8},{0x0002, 3},{0x0001, 3},{0x0017, 5},
+    {0x0016, 5},{0x0018, 6},{0x00D1, 8},{0x01A0, 9},
+    {0x0686,11},{0x0D0F,12},{0x0D0A,12},{0x1A17,13},
+    {0x1A16,13},{0x1A1D,13},{0x1A1C,13},{0x000F, 4},
+    {0x001D, 5},{0x000E, 5},{0x0035, 6},{0x0038, 6},
+    {0x0000, 3},{0x000F, 5},{0x0019, 6},{0x0069, 7}
+  },
+  {
+    {0x0003, 3},{0x000C, 4},{0x001B, 5},{0x0000, 3},
+    {0x0003, 4},{0x002E, 6},{0x0051, 9},{0x00BC, 8},
+    {0x0053, 9},{0x0004, 3},{0x0002, 3},{0x0016, 5},
+    {0x0015, 5},{0x0015, 7},{0x0050, 9},{0x00A4,10},
+    {0x0294,12},{0x052B,13},{0x052A,13},{0x052D,13},
+    {0x052C,13},{0x052F,13},{0x052E,13},{0x000E, 4},
+    {0x001A, 5},{0x0004, 5},{0x0028, 6},{0x0029, 6},
+    {0x000F, 4},{0x000B, 6},{0x005F, 7},{0x00BD, 8}
+  },
+  {
+    {0x0003, 4},{0x0009, 6},{0x00D0, 8},{0x01A3, 9},
+    {0x0344,10},{0x0D14,12},{0x1A2B,13},{0x0004, 4},
+    {0x0015, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
+    {0x000C, 4},{0x000E, 4},{0x0009, 4},{0x001B, 5},
+    {0x000A, 5},{0x0014, 5},{0x000D, 5},{0x002A, 6},
+    {0x0014, 7},{0x068B,11},{0x1A2A,13},{0x0008, 4},
+    {0x000B, 5},{0x002B, 6},{0x000B, 6},{0x0069, 7},
+    {0x0035, 6},{0x0008, 6},{0x0007, 4},{0x000C, 5}
+  },
+  {
+    {0x000A, 4},{0x003C, 6},{0x0032, 7},{0x0030, 7},
+    {0x00C5, 9},{0x0621,12},{0x0620,12},{0x001F, 5},
+    {0x0033, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
+    {0x000D, 4},{0x000C, 4},{0x0004, 4},{0x000D, 5},
+    {0x0026, 6},{0x0027, 6},{0x0014, 6},{0x0063, 8},
+    {0x0189,10},{0x0623,12},{0x0622,12},{0x000B, 4},
+    {0x0012, 5},{0x003D, 6},{0x0022, 6},{0x0015, 6},
+    {0x000B, 5},{0x0023, 6},{0x0007, 4},{0x0010, 5}
+  },
+  {
+    {0x000F, 4},{0x000C, 5},{0x0043, 7},{0x0010, 6},
+    {0x0044, 8},{0x0114,10},{0x0455,12},{0x0018, 5},
+    {0x0023, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
+    {0x000D, 4},{0x0009, 4},{0x0019, 5},{0x0009, 5},
+    {0x0017, 6},{0x0016, 6},{0x0042, 7},{0x008B, 9},
+    {0x0454,12},{0x0457,12},{0x0456,12},{0x000B, 4},
+    {0x0015, 5},{0x000A, 5},{0x0029, 6},{0x0020, 6},
+    {0x000D, 5},{0x0028, 6},{0x0007, 4},{0x0011, 5}
+  },
+  {
+    {0x0001, 3},{0x001A, 5},{0x0029, 6},{0x002A, 6},
+    {0x00A0, 8},{0x0285,10},{0x1425,13},{0x0002, 5},
+    {0x0000, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
+    {0x000B, 4},{0x0008, 4},{0x0012, 5},{0x0001, 6},
+    {0x0051, 7},{0x0001, 7},{0x0143, 9},{0x0508,11},
+    {0x1424,13},{0x1427,13},{0x1426,13},{0x000F, 4},
+    {0x001C, 5},{0x0003, 5},{0x0037, 6},{0x002B, 6},
+    {0x0013, 5},{0x0036, 6},{0x001D, 5},{0x0001, 5}
+  },
+  {
+    {0x0004, 3},{0x001F, 5},{0x003D, 6},{0x0006, 5},
+    {0x0016, 7},{0x0053, 9},{0x014A,11},{0x0034, 6},
+    {0x002A, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x001C, 5},{0x0037, 6},{0x0017, 7},
+    {0x002B, 8},{0x0028, 8},{0x00A4,10},{0x052D,13},
+    {0x052C,13},{0x052F,13},{0x052E,13},{0x0000, 3},
+    {0x001D, 5},{0x0007, 5},{0x0004, 5},{0x0035, 6},
+    {0x0014, 5},{0x0036, 6},{0x0015, 5},{0x003C, 6}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x0007, 5},{0x001D, 5},
+    {0x0009, 6},{0x01F3, 9},{0x07C7,11},{0x0008, 6},
+    {0x01F0, 9},{0x0003, 3},{0x0002, 3},{0x000D, 4},
+    {0x000C, 4},{0x0017, 5},{0x007D, 7},{0x01F2, 9},
+    {0x07C6,11},{0x07C5,11},{0x1F12,13},{0x3E27,14},
+    {0x3E26,14},{0x1F11,13},{0x1F10,13},{0x0000, 3},
+    {0x001E, 5},{0x0006, 5},{0x0039, 6},{0x0038, 6},
+    {0x003F, 6},{0x002C, 6},{0x0005, 5},{0x002D, 6}
+  },
+  {
+    {0x0002, 3},{0x0007, 4},{0x0018, 5},{0x0003, 4},
+    {0x0005, 5},{0x0035, 7},{0x004F, 9},{0x0012, 7},
+    {0x04E5,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
+    {0x000E, 4},{0x0033, 6},{0x0026, 8},{0x009D,10},
+    {0x04E4,13},{0x04E7,13},{0x04E6,13},{0x04E1,13},
+    {0x04E0,13},{0x04E3,13},{0x04E2,13},{0x0000, 3},
+    {0x001F, 5},{0x000C, 5},{0x003D, 6},{0x003C, 6},
+    {0x0032, 6},{0x0034, 7},{0x001B, 6},{0x0008, 6}
+  },
+  {
+    {0x0000, 3},{0x0004, 4},{0x001C, 5},{0x000F, 4},
+    {0x0002, 4},{0x0007, 5},{0x0075, 7},{0x00E8, 8},
+    {0x1D2A,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
+    {0x000C, 4},{0x0077, 7},{0x0E96,12},{0x3A57,14},
+    {0x3A56,14},{0x3A5D,14},{0x3A5C,14},{0x3A5F,14},
+    {0x3A5E,14},{0x1D29,13},{0x1D28,13},{0x0003, 3},
+    {0x0006, 5},{0x000A, 5},{0x002C, 7},{0x0017, 6},
+    {0x0076, 7},{0x01D3, 9},{0x03A4,10},{0x002D, 7}
+  },
+  {
+    {0x000A, 4},{0x0024, 6},{0x00BF, 8},{0x0085, 8},
+    {0x0211,10},{0x0842,12},{0x1087,13},{0x0018, 5},
+    {0x0020, 6},{0x0001, 3},{0x0002, 3},{0x000E, 4},
+    {0x000D, 4},{0x0007, 4},{0x0013, 5},{0x0025, 6},
+    {0x005E, 7},{0x0043, 7},{0x00BE, 8},{0x0109, 9},
+    {0x1086,13},{0x0841,12},{0x0840,12},{0x000F, 4},
+    {0x0001, 4},{0x0011, 5},{0x0000, 5},{0x002E, 6},
+    {0x0019, 5},{0x0001, 5},{0x0006, 4},{0x0016, 5}
+  },
+  {
+    {0x0002, 3},{0x000F, 5},{0x006F, 7},{0x0061, 7},
+    {0x0374,10},{0x1BA8,13},{0x3753,14},{0x0012, 5},
+    {0x0036, 6},{0x0000, 3},{0x0001, 3},{0x000A, 4},
+    {0x000B, 4},{0x001A, 5},{0x0031, 6},{0x0060, 7},
+    {0x00DC, 8},{0x01BB, 9},{0x06EB,11},{0x1BAB,13},
+    {0x3752,14},{0x3755,14},{0x3754,14},{0x000E, 4},
+    {0x0006, 4},{0x0013, 5},{0x000E, 5},{0x003E, 6},
+    {0x0008, 4},{0x001E, 5},{0x0019, 5},{0x003F, 6}
+  },
+  {
+    {0x0003, 3},{0x001C, 5},{0x0025, 6},{0x0024, 6},
+    {0x01DA, 9},{0x1DBD,13},{0x3B7C,14},{0x003C, 6},
+    {0x003D, 6},{0x0000, 3},{0x0001, 3},{0x000B, 4},
+    {0x000A, 4},{0x000B, 5},{0x0077, 7},{0x00EC, 8},
+    {0x03B6,10},{0x076E,11},{0x1DBF,13},{0x76FB,15},
+    {0x76FA,15},{0x3B79,14},{0x3B78,14},{0x000D, 4},
+    {0x001F, 5},{0x0013, 5},{0x000A, 5},{0x0008, 5},
+    {0x000C, 4},{0x0008, 4},{0x0009, 5},{0x003A, 6}
+  },
+  {
+    {0x0005, 3},{0x0003, 4},{0x0004, 5},{0x0010, 5},
+    {0x008F, 8},{0x0475,11},{0x11D1,13},{0x0079, 7},
+    {0x0027, 6},{0x0002, 3},{0x0003, 3},{0x0001, 4},
+    {0x0000, 4},{0x0026, 6},{0x0046, 7},{0x011C, 9},
+    {0x0477,11},{0x08ED,12},{0x11D0,13},{0x11D3,13},
+    {0x11D2,13},{0x11D9,13},{0x11D8,13},{0x000D, 4},
+    {0x001F, 5},{0x0012, 5},{0x0005, 5},{0x003D, 6},
+    {0x000C, 4},{0x000E, 4},{0x0022, 6},{0x0078, 7}
+  },
+  {
+    {0x0005, 3},{0x000C, 4},{0x001B, 5},{0x0000, 4},
+    {0x0006, 6},{0x03E2,10},{0x3E3D,14},{0x000F, 7},
+    {0x0034, 6},{0x0003, 3},{0x0002, 3},{0x001E, 5},
+    {0x001D, 5},{0x007D, 7},{0x01F0, 9},{0x07C6,11},
+    {0x3E3C,14},{0x3E3F,14},{0x3E3E,14},{0x3E39,14},
+    {0x3E38,14},{0x3E3B,14},{0x3E3A,14},{0x0008, 4},
+    {0x001C, 5},{0x0002, 5},{0x003F, 6},{0x0035, 6},
+    {0x0009, 4},{0x0001, 3},{0x000E, 7},{0x00F9, 8}
+  },
+  {
+    {0x0004, 3},{0x000B, 4},{0x0001, 4},{0x000A, 4},
+    {0x001E, 6},{0x00E0, 9},{0x0E1E,13},{0x0071, 8},
+    {0x0039, 7},{0x0007, 3},{0x0006, 3},{0x000D, 5},
+    {0x000C, 5},{0x0020, 7},{0x01C2,10},{0x1C3F,14},
+    {0x1C3E,14},{0x0E19,13},{0x0E18,13},{0x0E1B,13},
+    {0x0E1A,13},{0x0E1D,13},{0x0E1C,13},{0x0000, 4},
+    {0x0009, 5},{0x001D, 6},{0x001F, 6},{0x0011, 6},
+    {0x0005, 4},{0x0001, 3},{0x0043, 8},{0x0042, 8}
+  },
+  {
+    {0x0004, 3},{0x000D, 4},{0x0007, 4},{0x0002, 3},
+    {0x0014, 5},{0x016C, 9},{0x16D1,13},{0x02DF,10},
+    {0x016E, 9},{0x0000, 2},{0x0007, 3},{0x002C, 6},
+    {0x002B, 6},{0x02DE,10},{0x16D0,13},{0x16D3,13},
+    {0x16D2,13},{0x2DB5,14},{0x2DB4,14},{0x2DB7,14},
+    {0x2DB6,14},{0x16D9,13},{0x16D8,13},{0x000C, 5},
+    {0x002A, 6},{0x005A, 7},{0x001B, 6},{0x001A, 6},
+    {0x0017, 5},{0x000C, 4},{0x05B7,11},{0x05B5,11}
+  },
+  {
+    {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
+    {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
+    {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
+    {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
+    {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
+    {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
+    {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
+    {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
+  },
+  {
+    {0x0000, 3},{0x0010, 5},{0x0072, 7},{0x0071, 7},
+    {0x0154, 9},{0x0AAB,12},{0x0AA8,12},{0x0014, 5},
+    {0x0070, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
+    {0x000B, 4},{0x0003, 4},{0x0011, 5},{0x0073, 7},
+    {0x0054, 7},{0x00AB, 8},{0x02AB,10},{0x1553,13},
+    {0x1552,13},{0x1555,13},{0x1554,13},{0x000D, 4},
+    {0x001E, 5},{0x0012, 5},{0x003E, 6},{0x002B, 6},
+    {0x0002, 4},{0x003F, 6},{0x001D, 5},{0x0013, 5}
+  },
+  {
+    {0x0003, 3},{0x001F, 5},{0x0029, 6},{0x003D, 6},
+    {0x000C, 7},{0x0069,10},{0x0345,13},{0x0002, 5},
+    {0x0028, 6},{0x0002, 3},{0x0001, 3},{0x000E, 4},
+    {0x000C, 4},{0x0015, 5},{0x0007, 6},{0x001B, 8},
+    {0x006B,10},{0x006A,10},{0x0344,13},{0x0347,13},
+    {0x0346,13},{0x01A1,12},{0x01A0,12},{0x000B, 4},
+    {0x001A, 5},{0x0012, 5},{0x0000, 5},{0x003C, 6},
+    {0x0008, 4},{0x001B, 5},{0x0013, 5},{0x0001, 5}
+  },
+  {
+    {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0014, 5},
+    {0x0056, 7},{0x015C, 9},{0x15D5,13},{0x003C, 6},
+    {0x002A, 6},{0x0000, 3},{0x0001, 3},{0x000E, 4},
+    {0x000D, 4},{0x000C, 5},{0x00AF, 8},{0x02BB,10},
+    {0x15D4,13},{0x15D7,13},{0x15D6,13},{0x15D1,13},
+    {0x15D0,13},{0x15D3,13},{0x15D2,13},{0x000B, 4},
+    {0x0019, 5},{0x000D, 5},{0x003E, 6},{0x0031, 6},
+    {0x0007, 4},{0x0005, 4},{0x003D, 6},{0x0030, 6}
+  },
+  {
+    {0x0005, 3},{0x0008, 4},{0x001A, 5},{0x0000, 4},
+    {0x0036, 6},{0x0011, 8},{0x0106,12},{0x000A, 7},
+    {0x006E, 7},{0x0002, 3},{0x0003, 3},{0x0003, 4},
+    {0x0002, 4},{0x006F, 7},{0x0021, 9},{0x020F,13},
+    {0x020E,13},{0x0101,12},{0x0100,12},{0x0103,12},
+    {0x0102,12},{0x0105,12},{0x0104,12},{0x000C, 4},
+    {0x001E, 5},{0x0003, 5},{0x003E, 6},{0x003F, 6},
+    {0x0009, 4},{0x000E, 4},{0x000B, 7},{0x0009, 7}
+  },
+  {
+    {0x0002, 3},{0x000E, 4},{0x001E, 5},{0x000C, 4},
+    {0x001F, 5},{0x006E, 7},{0x00AD,10},{0x00AF,10},
+    {0x0014, 7},{0x0004, 3},{0x0003, 3},{0x001A, 5},
+    {0x0017, 5},{0x002A, 8},{0x0576,13},{0x0AEF,14},
+    {0x0AEE,14},{0x0571,13},{0x0570,13},{0x0573,13},
+    {0x0572,13},{0x0575,13},{0x0574,13},{0x0003, 4},
+    {0x0016, 5},{0x0004, 5},{0x0036, 6},{0x000B, 6},
+    {0x000A, 4},{0x0000, 3},{0x006F, 7},{0x00AC,10}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0003, 3},{0x0011, 5},{0x0020, 6},{0x0074, 7},
+    {0x010D, 9},{0x0863,12},{0x0860,12},{0x000A, 5},
+    {0x0075, 7},{0x0001, 3},{0x0000, 3},{0x000B, 4},
+    {0x000A, 4},{0x0018, 5},{0x0038, 6},{0x0042, 7},
+    {0x010F, 9},{0x010E, 9},{0x0219,10},{0x10C3,13},
+    {0x10C2,13},{0x10C5,13},{0x10C4,13},{0x000F, 4},
+    {0x0004, 4},{0x0019, 5},{0x000B, 5},{0x0039, 6},
+    {0x0009, 4},{0x001B, 5},{0x001A, 5},{0x003B, 6}
+  },
+  {
+    {0x0005, 3},{0x0001, 4},{0x003E, 6},{0x0001, 5},
+    {0x00E2, 8},{0x1C6F,13},{0x38D9,14},{0x0039, 6},
+    {0x001F, 6},{0x0002, 3},{0x0001, 3},{0x0009, 4},
+    {0x0008, 4},{0x0000, 5},{0x0070, 7},{0x01C7, 9},
+    {0x038C,10},{0x071A,11},{0x38D8,14},{0x38DB,14},
+    {0x38DA,14},{0x38DD,14},{0x38DC,14},{0x000D, 4},
+    {0x001D, 5},{0x000E, 5},{0x003F, 6},{0x003C, 6},
+    {0x000C, 4},{0x0006, 4},{0x003D, 6},{0x001E, 6}
+  },
+  {
+    {0x0006, 3},{0x000B, 4},{0x0011, 5},{0x001E, 5},
+    {0x0074, 7},{0x03AA,10},{0x1D5C,13},{0x0001, 6},
+    {0x0021, 6},{0x0001, 3},{0x0002, 3},{0x0007, 4},
+    {0x0006, 4},{0x003E, 6},{0x00EB, 8},{0x01D4, 9},
+    {0x0EAF,12},{0x3ABB,14},{0x3ABA,14},{0x1D59,13},
+    {0x1D58,13},{0x1D5B,13},{0x1D5A,13},{0x000A, 4},
+    {0x001C, 5},{0x0001, 5},{0x003F, 6},{0x003B, 6},
+    {0x0001, 4},{0x0009, 4},{0x0020, 6},{0x0000, 6}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x0017, 5},{0x0004, 4},
+    {0x0016, 6},{0x016A, 9},{0x16B1,13},{0x0017, 7},
+    {0x005B, 7},{0x0006, 3},{0x0007, 3},{0x0001, 4},
+    {0x0000, 4},{0x000A, 6},{0x02D7,10},{0x0B5A,12},
+    {0x16B0,13},{0x16B3,13},{0x16B2,13},{0x2D6D,14},
+    {0x2D6C,14},{0x2D6F,14},{0x2D6E,14},{0x0006, 4},
+    {0x000A, 5},{0x0004, 5},{0x002C, 6},{0x0017, 6},
+    {0x0003, 4},{0x0007, 4},{0x0016, 7},{0x00B4, 8}
+  },
+  {
+    {0x0005, 3},{0x000D, 4},{0x0005, 4},{0x0009, 4},
+    {0x0033, 6},{0x0193, 9},{0x192C,13},{0x0061, 8},
+    {0x0031, 7},{0x0000, 2},{0x0007, 3},{0x0010, 5},
+    {0x0011, 5},{0x00C8, 8},{0x192F,13},{0x325B,14},
+    {0x325A,14},{0x1929,13},{0x1928,13},{0x192B,13},
+    {0x192A,13},{0x325D,14},{0x325C,14},{0x0018, 5},
+    {0x001A, 6},{0x001B, 6},{0x0065, 7},{0x0019, 6},
+    {0x0004, 4},{0x0007, 4},{0x0060, 8},{0x0324,10}
+  },
+  {
+    {0x0006, 3},{0x0000, 3},{0x0002, 4},{0x000F, 4},
+    {0x0039, 6},{0x01D9, 9},{0x1D82,13},{0x0761,11},
+    {0x03BE,10},{0x0001, 2},{0x0002, 2},{0x000F, 6},
+    {0x000E, 6},{0x0762,11},{0x3B07,14},{0x3B06,14},
+    {0x3B1D,14},{0x3B1C,14},{0x3B1F,14},{0x3B1E,14},
+    {0x3B19,14},{0x3B18,14},{0x3B1B,14},{0x0038, 6},
+    {0x01DE, 9},{0x00ED, 8},{0x03BF,10},{0x00EE, 8},
+    {0x003A, 6},{0x0006, 5},{0x0EC0,12},{0x3B1A,14}
+  },
+  {
+    {0x0000, 2},{0x0002, 3},{0x000F, 5},{0x0006, 4},
+    {0x001C, 6},{0x01D0,10},{0x0E8C,13},{0x1D1B,14},
+    {0x1D1A,14},{0x0003, 2},{0x0002, 2},{0x00EA, 9},
+    {0x00E9, 9},{0x0E89,13},{0x0E88,13},{0x0E8B,13},
+    {0x0E8A,13},{0x1D65,14},{0x1D64,14},{0x1D67,14},
+    {0x1D66,14},{0x1D61,14},{0x1D60,14},{0x03AD,11},
+    {0x1D63,14},{0x1D62,14},{0x1D1D,14},{0x1D1C,14},
+    {0x003B, 7},{0x01D7,10},{0x1D1F,14},{0x1D1E,14}
+  },
+  {
+    {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
+    {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
+    {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
+    {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
+    {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
+    {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
+    {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
+    {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
+  }
+};
+
+
+
+/*A description of a Huffman code value used when encoding the tree.*/
+typedef struct{
+  /*The bit pattern, left-shifted so that the MSB of all patterns is
+     aligned.*/
+  ogg_uint32_t pattern;
+  /*The amount the bit pattern was shifted.*/
+  int          shift;
+  /*The token this bit pattern represents.*/
+  int          token;
+}oc_huff_entry;
+
+
+
+/*Compares two oc_huff_entry structures by their bit patterns.
+  _c1: The first entry to compare.
+  _c2: The second entry to compare.
+  Return: <0 if _c1<_c2, >0 if _c1>_c2.*/
+static int huff_entry_cmp(const void *_c1,const void *_c2){
+  ogg_uint32_t b1;
+  ogg_uint32_t b2;
+  b1=((const oc_huff_entry *)_c1)->pattern;
+  b2=((const oc_huff_entry *)_c2)->pattern;
+  return b1<b2?-1:b1>b2?1:0;
+}
+
+/*Encodes a description of the given Huffman tables.
+  Although the codes are stored in the encoder as flat arrays, in the bit
+   stream and in the decoder they are structured as a tree.
+  This function recovers the tree structure from the flat array and then
+   writes it out.
+  Note that the codes MUST form a Huffman code, and not merely a prefix-free
+   code, since the binary tree is assumed to be full.
+  _opb:   The buffer to store the tree in.
+  _codes: The Huffman tables to pack.
+  Return: 0 on success, or a negative value if one of the given Huffman tables
+   does not form a full, prefix-free code.*/
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_entry entries[TH_NDCT_TOKENS];
+    int           bpos;
+    int           maxlen;
+    int           mask;
+    int           j;
+    /*First, find the maximum code length so we can align all the bit
+       patterns.*/
+    maxlen=_codes[i][0].nbits;
+    for(j=1;j<TH_NDCT_TOKENS;j++){
+      maxlen=OC_MAXI(_codes[i][j].nbits,maxlen);
+    }
+    mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1;
+    /*Copy over the codes into our temporary workspace.
+      The bit patterns are aligned, and the original entry each code is from
+       is stored as well.*/
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      entries[j].shift=maxlen-_codes[i][j].nbits;
+      entries[j].pattern=_codes[i][j].pattern<<entries[j].shift&mask;
+      entries[j].token=j;
+    }
+    /*Sort the codes into ascending order.
+      This is the order the leaves of the tree will be traversed.*/
+    qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp);
+    /*For each leaf of the tree:*/
+    bpos=maxlen;
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      int bit;
+      /*If this code has any bits at all.*/
+      if(entries[j].shift<maxlen){
+        /*Descend into the tree, writing a bit for each branch.*/
+        for(;bpos>entries[j].shift;bpos--)oggpackB_write(_opb,0,1);
+        /*Mark this as a leaf node, and write its value.*/
+        oggpackB_write(_opb,1,1);
+        oggpackB_write(_opb,entries[j].token,5);
+        /*For each 1 branch we've descended, back up the tree until we reach a
+           0 branch.*/
+        bit=1<<bpos;
+        for(;entries[j].pattern&bit;bpos++)bit<<=1;
+        /*Validate the code.*/
+        if(j+1<TH_NDCT_TOKENS){
+          mask=~(bit-1)<<1;
+          /*The next entry should have a 1 bit where we had a 0, and should
+             match our code above that bit.
+            This verifies both fullness and prefix-freeness simultaneously.*/
+          if(!(entries[j+1].pattern&bit)||
+           (entries[j].pattern&mask)!=(entries[j+1].pattern&mask)){
+            return TH_EINVAL;
+          }
+        }
+        /*If there are no more codes, we should have ascended back to the top
+           of the tree.*/
+        else if(bpos<maxlen)return TH_EINVAL;
+      }
+    }
+  }
+  return 0;
+}

Copied: trunk/theora/lib/huffenc.h (from rev 16442, trunk/theora/lib/enc/huffenc.h)
===================================================================
--- trunk/theora/lib/huffenc.h	                        (rev 0)
+++ trunk/theora/lib/huffenc.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,19 @@
+#if !defined(_huffenc_H)
+# define _huffenc_H (1)
+# include "huffman.h"
+
+
+
+typedef th_huff_code                  th_huff_table[TH_NDCT_TOKENS];
+
+
+
+extern const th_huff_code
+ TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+
+
+
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]);
+
+#endif

Copied: trunk/theora/lib/huffman.h (from rev 16442, trunk/theora/lib/dec/huffman.h)
===================================================================
--- trunk/theora/lib/huffman.h	                        (rev 0)
+++ trunk/theora/lib/huffman.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,70 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_huffman_H)
+# define _hufffman_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+/*The range of valid quantized DCT coefficient values.
+  VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
+#define OC_DCT_VAL_RANGE         (580)
+
+#define OC_NDCT_TOKEN_BITS       (5)
+
+#define OC_DCT_EOB1_TOKEN        (0)
+#define OC_DCT_EOB2_TOKEN        (1)
+#define OC_DCT_EOB3_TOKEN        (2)
+#define OC_DCT_REPEAT_RUN0_TOKEN (3)
+#define OC_DCT_REPEAT_RUN1_TOKEN (4)
+#define OC_DCT_REPEAT_RUN2_TOKEN (5)
+#define OC_DCT_REPEAT_RUN3_TOKEN (6)
+
+#define OC_DCT_SHORT_ZRL_TOKEN   (7)
+#define OC_DCT_ZRL_TOKEN         (8)
+
+#define OC_ONE_TOKEN             (9)
+#define OC_MINUS_ONE_TOKEN       (10)
+#define OC_TWO_TOKEN             (11)
+#define OC_MINUS_TWO_TOKEN       (12)
+
+#define OC_DCT_VAL_CAT2          (13)
+#define OC_DCT_VAL_CAT3          (17)
+#define OC_DCT_VAL_CAT4          (18)
+#define OC_DCT_VAL_CAT5          (19)
+#define OC_DCT_VAL_CAT6          (20)
+#define OC_DCT_VAL_CAT7          (21)
+#define OC_DCT_VAL_CAT8          (22)
+
+#define OC_DCT_RUN_CAT1A         (23)
+#define OC_DCT_RUN_CAT1B         (28)
+#define OC_DCT_RUN_CAT1C         (29)
+#define OC_DCT_RUN_CAT2A         (30)
+#define OC_DCT_RUN_CAT2B         (31)
+
+#define OC_NDCT_EOB_TOKEN_MAX    (7)
+#define OC_NDCT_ZRL_TOKEN_MAX    (9)
+#define OC_NDCT_VAL_MAX          (23)
+#define OC_NDCT_VAL_CAT1_MAX     (13)
+#define OC_NDCT_VAL_CAT2_MAX     (17)
+#define OC_NDCT_VAL_CAT2_SIZE    (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
+#define OC_NDCT_RUN_MAX          (32)
+#define OC_NDCT_RUN_CAT1A_MAX    (28)
+
+extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
+
+#endif

Copied: trunk/theora/lib/idct.c (from rev 16442, trunk/theora/lib/dec/idct.c)
===================================================================
--- trunk/theora/lib/idct.c	                        (rev 0)
+++ trunk/theora/lib/idct.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,335 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <string.h>
+#include "internal.h"
+#include "dct.h"
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
+static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  /*0-1 butterfly.*/
+  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
+  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
+  /*2-3 rotation by 6pi/16.*/
+  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
+  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
+  /*4-7 rotation by 7pi/16.*/
+  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
+  /*5-6 rotation by 3pi/16.*/
+  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
+  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
+  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
+  /*Stage 2:*/
+  /*4-5 butterfly.*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  /*7-6 butterfly.*/
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  /*0-3 butterfly.*/
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  /*1-2 butterfly.*/
+  r=t[1]+t[2];
+  t[2]=t[1]-t[2];
+  t[1]=r;
+  /*6-5 butterfly.*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  /*0-7 butterfly.*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  /*1-6 butterfly.*/
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  /*2-5 butterfly.*/
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  /*3-4 butterfly.*/
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 4 entries are used.
+      The other 4 are assumed to be 0.*/
+static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[5]=-(OC_C5S3*_x[3]>>16);
+  t[6]=OC_C3S5*_x[3]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 3 entries are used.
+      The other 5 are assumed to be 0.*/
+static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 2 entries are used.
+      The other 6 are assumed to be 0.*/
+static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first entry is used.
+      The other 7 are assumed to be 0.*/
+static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
+  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
+   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  idct8_2(w,_x);
+  idct8_1(w+1,_x+8);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_2(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
+   x  x  x  x  0  0  0  0
+   x  x  x  0  0  0  0  0
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  idct8_4(w,_x);
+  idct8_3(w+1,_x+8);
+  idct8_2(w+2,_x+16);
+  idct8_1(w+3,_x+24);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_4(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  for(in=_x,out=w,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],
+ int _last_zzi){
+  (*_state->opt_vtable.idct8x8)(_y,_last_zzi);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.*/
+void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     use a smaller transform when the block ends with a long zero run instead
+     of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Then perform the iDCT.*/
+  if(_last_zzi<3)oc_idct8x8_3(_y,_y);
+  else if(_last_zzi<10)oc_idct8x8_10(_y,_y);
+  else oc_idct8x8_slow(_y,_y);
+}

Copied: trunk/theora/lib/info.c (from rev 16442, trunk/theora/lib/dec/info.c)
===================================================================
--- trunk/theora/lib/info.c	                        (rev 0)
+++ trunk/theora/lib/info.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,131 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*This is more or less the same as strncasecmp, but that doesn't exist
+   everywhere, and this is a fairly trivial function, so we include it.
+  Note: We take advantage of the fact that we know _n is less than or equal to
+   the length of at least one of the strings.*/
+static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
+  int c;
+  for(c=0;c<_n;c++){
+    if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
+  }
+  return _s1[c]!='=';
+}
+
+
+
+void th_info_init(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+  _info->version_major=TH_VERSION_MAJOR;
+  _info->version_minor=TH_VERSION_MINOR;
+  _info->version_subminor=TH_VERSION_SUB;
+  _info->keyframe_granule_shift=6;
+}
+
+void th_info_clear(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+}
+
+
+
+void th_comment_init(th_comment *_tc){
+  memset(_tc,0,sizeof(*_tc));
+}
+
+void th_comment_add(th_comment *_tc,char *_comment){
+  char **user_comments;
+  int   *comment_lengths;
+  int    comment_len;
+  user_comments=_ogg_realloc(_tc->user_comments,
+   (_tc->comments+2)*sizeof(*_tc->user_comments));
+  if(user_comments==NULL)return;
+  _tc->user_comments=user_comments;
+  comment_lengths=_ogg_realloc(_tc->comment_lengths,
+   (_tc->comments+2)*sizeof(*_tc->comment_lengths));
+  if(comment_lengths==NULL)return;
+  _tc->comment_lengths=comment_lengths;
+  comment_len=strlen(_comment);
+  comment_lengths[_tc->comments]=comment_len;
+  user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
+  if(user_comments[_tc->comments]==NULL)return;
+  memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
+  _tc->comments++;
+  _tc->user_comments[_tc->comments]=NULL;
+}
+
+void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){
+  char *comment;
+  int   tag_len;
+  int   val_len;
+  tag_len=strlen(_tag);
+  val_len=strlen(_val);
+  /*+2 for '=' and '\0'.*/
+  comment=_ogg_malloc(tag_len+val_len+2);
+  if(comment==NULL)return;
+  memcpy(comment,_tag,tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_val,val_len+1);
+  th_comment_add(_tc,comment);
+  _ogg_free(comment);
+}
+
+char *th_comment_query(th_comment *_tc,char *_tag,int _count){
+  long i;
+  int  found;
+  int  tag_len;
+  tag_len=strlen(_tag);
+  found=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return _tc->user_comments[i]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int th_comment_query_count(th_comment *_tc,char *_tag){
+  long i;
+  int  tag_len;
+  int  count;
+  tag_len=strlen(_tag);
+  count=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
+  }
+  return count;
+}
+
+void th_comment_clear(th_comment *_tc){
+  if(_tc!=NULL){
+    long i;
+    for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
+    _ogg_free(_tc->user_comments);
+    _ogg_free(_tc->comment_lengths);
+    _ogg_free(_tc->vendor);
+    memset(_tc,0,sizeof(*_tc));
+  }
+}

Copied: trunk/theora/lib/internal.c (from rev 16442, trunk/theora/lib/dec/internal.c)
===================================================================
--- trunk/theora/lib/internal.c	                        (rev 0)
+++ trunk/theora/lib/internal.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,262 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.
+  All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
+   past the end of a block in bogus streams get mapped to a known location.*/
+const unsigned char OC_FZIG_ZAG[128]={
+   0, 1, 8,16, 9, 2, 3,10,
+  17,24,32,25,18,11, 4, 5,
+  12,19,26,33,40,48,41,34,
+  27,20,13, 6, 7,14,21,28,
+  35,42,49,56,57,50,43,36,
+  29,22,15,23,30,37,44,51,
+  58,59,52,45,38,31,39,46,
+  53,60,61,54,47,55,62,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+const unsigned char OC_IZIG_ZAG[64]={
+   0, 1, 5, 6,14,15,27,28,
+   2, 4, 7,13,16,26,29,42,
+   3, 8,12,17,25,30,41,43,
+   9,11,18,24,31,40,44,53,
+  10,19,23,32,39,45,52,54,
+  20,22,33,38,46,51,55,60,
+  21,34,37,47,50,56,59,61,
+  35,36,48,49,57,58,62,63
+};
+
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
+
+/*A list of the indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
+  {0,1,2,3,4,8},
+  {0,1,2,3,4,5,8,9},
+  {0,1,2,3,4,6,8,10},
+  {0,1,2,3,4,5,6,7,8,9,10,11}
+};
+
+/*The number of indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
+
+/*The number of extra bits that are coded with each of the DCT tokens.
+  Each DCT token has some fixed number of additional bits (possibly 0) stored
+   after the token itself, containing, for example, coefficient magnitude,
+   sign bits, etc.*/
+const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
+  0,0,0,2,3,4,12,3,6,
+  0,0,0,0,
+  1,1,1,1,2,3,4,5,6,10,
+  1,1,1,1,1,3,4,
+  2,3
+};
+
+
+
+int oc_ilog(unsigned _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[2][0];
+  dy=_lbmvs[0][1]+_lbmvs[2][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[1][0]+_lbmvs[3][0];
+  dy=_lbmvs[1][1]+_lbmvs[3][1];
+  _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation (4:4:4).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  rowsz;
+  size_t  colsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_malloc(datsz+colsz);
+  if(ret==NULL)return NULL;
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  colsz;
+  size_t  rowsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_calloc(datsz+colsz,1);
+  if(ret==NULL)return NULL;
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void oc_free_2d(void *_ptr){
+  _ogg_free(_ptr);
+}
+
+/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
+   buffer, but with the opposite vertical orientation.
+  _dst: The destination buffer.
+        This can be the same as _src.
+  _src: The source buffer.*/
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    _dst[pli].width=_src[pli].width;
+    _dst[pli].height=_src[pli].height;
+    _dst[pli].stride=-_src[pli].stride;
+    _dst[pli].data=_src[pli].data
+     +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
+  }
+}
+
+const char *th_version_string(void){
+  return OC_VENDOR_STRING;
+}
+
+ogg_uint32_t th_version_number(void){
+  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
+}
+
+/*Determines the packet type.
+  Note that this correctly interprets a 0-byte packet as a video data packet.
+  Return: 1 for a header packet, 0 for a data packet.*/
+int th_packet_isheader(ogg_packet *_op){
+  return _op->bytes>0?_op->packet[0]>>7:0;
+}
+
+/*Determines the frame type of a video data packet.
+  Note that this correctly interprets a 0-byte packet as a delta frame.
+  Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
+           packet.*/
+int th_packet_iskeyframe(ogg_packet *_op){
+  return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
+}

Modified: trunk/theora/lib/internal.h
===================================================================
--- trunk/theora/lib/internal.h	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/internal.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -37,9 +37,9 @@
 #  endif
 # endif
 
-# include "dec/ocintrin.h"
-# include "dec/huffman.h"
-# include "dec/quant.h"
+# include "ocintrin.h"
+# include "huffman.h"
+# include "quant.h"
 
 /*Some assembly constructs require aligned operands.*/
 # if defined(OC_X86_ASM)

Copied: trunk/theora/lib/mathops.c (from rev 16442, trunk/theora/lib/enc/mathops.c)
===================================================================
--- trunk/theora/lib/mathops.c	                        (rev 0)
+++ trunk/theora/lib/mathops.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,296 @@
+#include "mathops.h"
+#include <limits.h>
+
+/*The fastest fallback strategy for platforms with fast multiplication appears
+   to be based on de Bruijn sequences~\cite{LP98}.
+  Tests confirmed this to be true even on an ARM11, where it is actually faster
+   than using the native clz instruction.
+  Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where
+   multiplication or table lookups are too expensive.
+
+  @UNPUBLISHED{LP98,
+    author="Charles E. Leiserson and Harald Prokop",
+    title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
+    month=Jun,
+    year=1998,
+    note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
+  }*/
+#if !defined(OC_ILOG_NODEBRUIJN)&& \
+ !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL
+static const unsigned char OC_DEBRUIJN_IDX32[32]={
+   0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
+  31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
+};
+#endif
+
+int oc_ilog32(ogg_uint32_t _v){
+#if defined(OC_CLZ32)
+  return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v;
+#else
+/*On a Pentium M, this branchless version tested as the fastest version without
+   multiplications on 1,000,000,000 random 32-bit integers, edging out a
+   similar version with branches, and a 256-entry LUT version.*/
+# if defined(OC_ILOG_NODEBRUIJN)
+  int ret;
+  int m;
+  ret=_v>0;
+  m=(_v>0xFFFFU)<<4;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFFU)<<3;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFU)<<2;
+  _v>>=m;
+  ret|=m;
+  m=(_v>3)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=_v>1;
+  return ret;
+/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
+# else
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
+  return ret;
+# endif
+#endif
+}
+
+int oc_ilog64(ogg_int64_t _v){
+#if defined(OC_CLZ64)
+  return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v;
+#else
+# if defined(OC_ILOG_NODEBRUIJN)
+  ogg_uint32_t v;
+  int          ret;
+  int          m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(ogg_uint32_t)(_v>>m);
+  ret|=m;
+  m=(v>0xFFFFU)<<4;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFFU)<<3;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFU)<<2;
+  v>>=m;
+  ret|=m;
+  m=(v>3)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=v>1;
+  return ret;
+# else
+/*If we don't have a 64-bit word, split it into two 32-bit halves.*/
+#  if LONG_MAX<9223372036854775807LL
+  ogg_uint32_t v;
+  int          ret;
+  int          m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(ogg_uint32_t)(_v>>m);
+  ret|=m;
+  v|=v>>1;
+  v|=v>>2;
+  v|=v>>4;
+  v|=v>>8;
+  v|=v>>16;
+  v=(v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
+  return ret;
+/*Otherwise do it in one 64-bit operation.*/
+#  else
+  static const unsigned char OC_DEBRUIJN_IDX64[64]={
+     0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
+     5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
+    63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
+    62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
+  };
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v|=_v>>32;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
+  return ret;
+#  endif
+# endif
+#endif
+}
+
+/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/
+static const ogg_int64_t OC_ATANH_LOG2[32]={
+  0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL,
+  0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL,
+  0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL,
+  0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL,
+  0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL,
+  0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL,
+  0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL,
+  0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL,
+  0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL,
+  0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL,
+  0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL
+};
+
+/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/
+ogg_int64_t oc_bexp64(ogg_int64_t _z){
+  ogg_int64_t w;
+  ogg_int64_t z;
+  int         ipart;
+  ipart=(int)(_z>>57);
+  if(ipart<0)return 0;
+  if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL;
+  z=_z-OC_Q57(ipart);
+  if(z){
+    ogg_int64_t mask;
+    long        wlo;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q62 format.
+      We need 1 bit of headroom since the magnitude can get larger than 1
+       during the iteration, and a sign bit.*/
+    z<<=5;
+    /*w is the exponential in Q61 format (since it also needs headroom and can
+       get as large as 2.0); we could get another bit if we dropped the sign,
+       but we'll recover that bit later anyway.
+      Ideally this should start out as
+        \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
+       but in order to guarantee convergence we have to repeat iterations 4,
+        13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
+    w=0x26A3D0E401DD846DLL;
+    for(i=0;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 4.*/
+      if(i>=3)break;
+      z<<=1;
+    }
+    for(;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 13.*/
+      if(i>=12)break;
+      z<<=1;
+    }
+    for(;i<32;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1;
+    }
+    wlo=0;
+    /*Skip the remaining iterations unless we really require that much
+       precision.
+      We could have bailed out earlier for smaller iparts, but that would
+       require initializing w from a table, as the limit doesn't converge to
+       61-bit precision until n=30.*/
+    if(ipart>30){
+      /*For these iterations, we just update the low bits, as the high bits
+         can't possibly be affected.
+        OC_ATANH_LOG2 has also converged (it actually did so one iteration
+         earlier, but that's no reason for an extra special case).*/
+      for(;;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z-=OC_ATANH_LOG2[31]+mask^mask;
+        /*Repeat iteration 40.*/
+        if(i>=39)break;
+        z<<=1;
+      }
+      for(;i<61;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1;
+      }
+    }
+    w=(w<<1)+wlo;
+  }
+  else w=(ogg_int64_t)1<<62;
+  if(ipart<62)w=(w>>61-ipart)+1>>1;
+  return w;
+}
+
+/*Computes the binary logarithm of _w, returned in Q57 format.*/
+ogg_int64_t oc_blog64(ogg_int64_t _w){
+  ogg_int64_t z;
+  int         ipart;
+  if(_w<=0)return -1;
+  ipart=OC_ILOGNZ_64(_w)-1;
+  if(ipart>61)_w>>=ipart-61;
+  else _w<<=61-ipart;
+  z=0;
+  if(_w&_w-1){
+    ogg_int64_t x;
+    ogg_int64_t y;
+    ogg_int64_t u;
+    ogg_int64_t mask;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q61 format.*/
+    /*x and y are the cosh() and sinh(), respectively, in Q61 format.
+      We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/
+    x=_w+((ogg_int64_t)1<<61);
+    y=_w-((ogg_int64_t)1<<61);
+    for(i=0;i<4;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 4.*/
+    for(i--;i<13;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 13.*/
+    for(i--;i<32;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*OC_ATANH_LOG2 has converged.*/
+    for(;i<40;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 40.*/
+    for(i--;i<62;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    z=z+8>>4;
+  }
+  return OC_Q57(ipart)+z;
+}

Copied: trunk/theora/lib/mathops.h (from rev 16442, trunk/theora/lib/enc/mathops.h)
===================================================================
--- trunk/theora/lib/mathops.h	                        (rev 0)
+++ trunk/theora/lib/mathops.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,141 @@
+#if !defined(_mathops_H)
+# define _mathops_H (1)
+# include <ogg/ogg.h>
+
+# ifdef __GNUC_PREREQ
+#  if __GNUC_PREREQ(3,4)
+#   include <limits.h>
+/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from
+   "upgrading" the type of an entire expression to an (unsigned) size_t.*/
+#   if INT_MAX>=2147483647
+#    define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define OC_CLZ32(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=2147483647L
+#    define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define OC_CLZ32(_x) (__builtin_clzl(_x))
+#   endif
+#   if INT_MAX>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clzl(_x))
+#   elif LLONG_MAX>=9223372036854775807LL|| \
+     __LONG_LONG_MAX__>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clzll(_x))
+#   endif
+#  endif
+# endif
+
+
+
+/**
+ * oc_ilog32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog32(ogg_uint32_t _v);
+/**
+ * oc_ilog64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog64(ogg_int64_t _v);
+
+
+# if defined(OC_CLZ32)
+/**
+ * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
+ * @_v: A non-zero 32-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_32() instead.
+ */
+#  define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v))
+/**
+ * OC_ILOG_32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_32(_v)   (OC_ILOGNZ_32(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_32(_v) (oc_ilog32(_v))
+#  define OC_ILOG_32(_v)   (oc_ilog32(_v))
+# endif
+
+# if defined(CLZ64)
+/**
+ * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
+ * @_v: A non-zero 64-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_64() instead.
+ */
+#  define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
+/**
+ * OC_ILOG_64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_64(_v)   (OC_ILOGNZ_64(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_64(_v) (oc_ilog64(_v))
+#  define OC_ILOG_64(_v)   (oc_ilog64(_v))
+# endif
+
+# define OC_STATIC_ILOG0(_v) (!!(_v))
+# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v))
+# define OC_STATIC_ILOG2(_v) \
+ (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v))
+# define OC_STATIC_ILOG3(_v) \
+ (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v))
+# define OC_STATIC_ILOG4(_v) \
+ (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v))
+# define OC_STATIC_ILOG5(_v) \
+ (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v))
+# define OC_STATIC_ILOG6(_v) \
+ (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v))
+/**
+ * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
+ * @_v: A non-negative 32-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v)))
+/**
+ * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
+ * @_v: A non-negative 64-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v)))
+
+#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57)
+
+ogg_int64_t oc_bexp64(ogg_int64_t _z);
+ogg_int64_t oc_blog64(ogg_int64_t _w);
+
+#endif

Copied: trunk/theora/lib/mcenc.c (from rev 16442, trunk/theora/lib/enc/mcenc.c)
===================================================================
--- trunk/theora/lib/mcenc.c	                        (rev 0)
+++ trunk/theora/lib/mcenc.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,767 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+typedef struct oc_mcenc_ctx           oc_mcenc_ctx;
+
+
+
+/*Temporary state used for motion estimation.*/
+struct oc_mcenc_ctx{
+  /*The candidate motion vectors.*/
+  int                candidates[13][2];
+  /*The start of the Set B candidates.*/
+  int                setb0;
+  /*The total number of candidates.*/
+  int                ncandidates;
+};
+
+
+
+/*The maximum Y plane SAD value for accepting the median predictor.*/
+#define OC_YSAD_THRESH1            (256)
+/*The amount to right shift the minimum error by when inflating it for
+   computing the second maximum Y plane SAD threshold.*/
+#define OC_YSAD_THRESH2_SCALE_BITS (4)
+/*The amount to add to the second maximum Y plane threshold when inflating
+   it.*/
+#define OC_YSAD_THRESH2_OFFSET     (64)
+
+/*The vector offsets in the X direction for each search site in the square
+   pattern.*/
+static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
+/*The vector offsets in the Y direction for each search site in the square
+   pattern.*/
+static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
+/*The number of sites to search for each boundary condition in the square
+   pattern.
+  Bit flags for the boundary conditions are as follows:
+  1: -16==dx
+  2:      dx==15(.5)
+  4: -16==dy
+  8:      dy==15(.5)*/
+static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
+/*The list of sites to search for each boundary condition in the square
+   pattern.*/
+static const int OC_SQUARE_SITES[11][8]={
+  /* -15.5<dx<31,       -15.5<dy<15(.5)*/
+  {0,1,2,3,5,6,7,8},
+  /*-15.5==dx,          -15.5<dy<15(.5)*/
+  {1,2,5,7,8},
+  /*     dx==15(.5),    -15.5<dy<15(.5)*/
+  {0,1,3,6,7},
+  /*-15.5==dx==15(.5),  -15.5<dy<15(.5)*/
+  {-1},
+  /* -15.5<dx<15(.5),  -15.5==dy*/
+  {3,5,6,7,8},
+  /*-15.5==dx,         -15.5==dy*/
+  {5,7,8},
+  /*     dx==15(.5),   -15.5==dy*/
+  {3,6,7},
+  /*-15.5==dx==15(.5), -15.5==dy*/
+  {-1},
+  /*-15.5dx<15(.5),           dy==15(.5)*/
+  {0,1,2,3,5},
+  /*-15.5==dx,                dy==15(.5)*/
+  {1,2,5},
+  /*       dx==15(.5),        dy==15(.5)*/
+  {0,1,3}
+};
+
+
+static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
+ int _accum[2],int _mbi,int _frame){
+  oc_mb_enc_info *embs;
+  int             a[3][2];
+  int             ncandidates;
+  unsigned        nmbi;
+  int             i;
+  embs=_enc->mb_info;
+  /*Skip a position to store the median predictor in.*/
+  ncandidates=1;
+  if(embs[_mbi].ncneighbors>0){
+    /*Fill in the first part of set A: the vectors from adjacent blocks.*/
+    for(i=0;i<embs[_mbi].ncneighbors;i++){
+      nmbi=embs[_mbi].cneighbors[i];
+      _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
+      _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
+      ncandidates++;
+    }
+  }
+  /*Add a few additional vectors to set A: the vectors used in the previous
+     frames and the (0,0) vector.*/
+  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31);
+  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
+  ncandidates++;
+  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+   embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
+  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+   embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
+  ncandidates++;
+  _mcenc->candidates[ncandidates][0]=0;
+  _mcenc->candidates[ncandidates][1]=0;
+  ncandidates++;
+  /*Use the first three vectors of set A to find our best predictor: their
+     median.*/
+  memcpy(a,_mcenc->candidates+1,sizeof(a));
+  OC_SORT2I(a[0][0],a[1][0]);
+  OC_SORT2I(a[0][1],a[1][1]);
+  OC_SORT2I(a[1][0],a[2][0]);
+  OC_SORT2I(a[1][1],a[2][1]);
+  OC_SORT2I(a[0][0],a[1][0]);
+  OC_SORT2I(a[0][1],a[1][1]);
+  _mcenc->candidates[0][0]=a[1][0];
+  _mcenc->candidates[0][1]=a[1][1];
+  /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
+  _mcenc->setb0=ncandidates;
+  /*The first time through the loop use the current macro block.*/
+  nmbi=_mbi;
+  for(i=0;;i++){
+    _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+     2*embs[_mbi].analysis_mv[1][_frame][0]
+     -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
+    _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+     2*embs[_mbi].analysis_mv[1][_frame][1]
+     -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
+    ncandidates++;
+    if(i>=embs[_mbi].npneighbors)break;
+    nmbi=embs[_mbi].pneighbors[i];
+  }
+  /*Truncate to full-pel positions.*/
+  for(i=0;i<ncandidates;i++){
+    _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]);
+    _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]);
+  }
+  _mcenc->ncandidates=ncandidates;
+}
+
+#if 0
+static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+  unsigned err;
+  int      bi;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
+     _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+  }
+  return err;
+}
+#endif
+
+static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+  unsigned err;
+  int      bi;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
+     _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+  }
+  return err;
+}
+
+static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _block_err[4]){
+  unsigned err;
+  int      mvoffset;
+  int      bi;
+  mvoffset=_dx+_dy*_ystride;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    unsigned  block_err;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    block_err=oc_enc_frag_sad(_enc,
+     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+    _block_err[bi]=block_err;
+    err+=block_err;
+  }
+  return err;
+}
+
+static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  int mvoffset;
+  int err;
+  int bi;
+  mvoffset=_dx+_dy*_ystride;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    err+=oc_enc_frag_satd_thresh(_enc,
+     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
+  }
+  return err;
+}
+
+static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
+ ptrdiff_t _frag_offs,int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  return oc_enc_frag_satd_thresh(_enc,
+   _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
+}
+
+/*Perform a motion vector search for this macro block against a single
+   reference frame.
+  As a bonus, individual block motion vectors are computed as well, as much of
+   the work can be shared.
+  The actual motion vector is stored in the appropriate place in the
+   oc_mb_enc_info structure.
+  _mcenc:    The motion compensation context.
+  _accum:    Drop frame/golden MV accumulators.
+  _mbi:      The macro block index.
+  _frame:    The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/
+void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){
+  /*Note: Traditionally this search is done using a rate-distortion objective
+     function of the form D+lambda*R.
+    However, xiphmont tested this and found it produced a small degredation,
+     while requiring extra computation.
+    This is most likely due to Theora's peculiar MV encoding scheme: MVs are
+     not coded relative to a predictor, and the only truly cheap way to use a
+     MV is in the LAST or LAST2 MB modes, which are not being considered here.
+    Therefore if we use the MV found here, it's only because both LAST and
+     LAST2 performed poorly, and therefore the MB is not likely to be uniform
+     or suffer from the aperture problem.
+    Furthermore we would like to re-use the MV found here for as many MBs as
+     possible, so picking a slightly sub-optimal vector to save a bit or two
+     may cause increased degredation in many blocks to come.
+    We could artificially reduce lambda to compensate, but it's faster to just
+     disable it entirely, and use D (the distortion) as the sole criterion.*/
+  oc_mcenc_ctx         mcenc;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  const unsigned char *src;
+  const unsigned char *ref;
+  int                  ystride;
+  oc_mb_enc_info      *embs;
+  ogg_int32_t          hit_cache[31];
+  ogg_int32_t          hitbit;
+  unsigned             best_block_err[4];
+  unsigned             block_err[4];
+  unsigned             best_err;
+  int                  best_vec[2];
+  int                  best_block_vec[4][2];
+  int                  candx;
+  int                  candy;
+  int                  bi;
+  embs=_enc->mb_info;
+  /*Find some candidate motion vectors.*/
+  oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
+  /*Clear the cache of locations we've examined.*/
+  memset(hit_cache,0,sizeof(hit_cache));
+  /*Start with the median predictor.*/
+  candx=mcenc.candidates[0][0];
+  candy=mcenc.candidates[0][1];
+  hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+  ystride=_enc->state.ref_ystride[0];
+  /*TODO: customize error function for speed/(quality+size) tradeoff.*/
+  best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+  best_vec[0]=candx;
+  best_vec[1]=candy;
+  if(_frame==OC_FRAME_PREV){
+    for(bi=0;bi<4;bi++){
+      best_block_err[bi]=block_err[bi];
+      best_block_vec[bi][0]=candx;
+      best_block_vec[bi][1]=candy;
+    }
+  }
+  /*If this predictor fails, move on to set A.*/
+  if(best_err>OC_YSAD_THRESH1){
+    unsigned err;
+    unsigned t2;
+    int      ncs;
+    int      ci;
+    /*Compute the early termination threshold for set A.*/
+    t2=embs[_mbi].error[_frame];
+    ncs=OC_MINI(3,embs[_mbi].ncneighbors);
+    for(ci=0;ci<ncs;ci++){
+      t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
+    }
+    t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
+    /*Examine the candidates in set A.*/
+    for(ci=1;ci<mcenc.setb0;ci++){
+      candx=mcenc.candidates[ci][0];
+      candy=mcenc.candidates[ci][1];
+      /*If we've already examined this vector, then we would be using it if it
+         was better than what we are using.*/
+      hitbit=(ogg_int32_t)1<<candx+15;
+      if(hit_cache[candy+15]&hitbit)continue;
+      hit_cache[candy+15]|=hitbit;
+      err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+       frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+      if(err<best_err){
+        best_err=err;
+        best_vec[0]=candx;
+        best_vec[1]=candy;
+      }
+      if(_frame==OC_FRAME_PREV){
+        for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+          best_block_err[bi]=block_err[bi];
+          best_block_vec[bi][0]=candx;
+          best_block_vec[bi][1]=candy;
+        }
+      }
+    }
+    if(best_err>t2){
+      /*Examine the candidates in set B.*/
+      for(;ci<mcenc.ncandidates;ci++){
+        candx=mcenc.candidates[ci][0];
+        candy=mcenc.candidates[ci][1];
+        hitbit=(ogg_int32_t)1<<candx+15;
+        if(hit_cache[candy+15]&hitbit)continue;
+        hit_cache[candy+15]|=hitbit;
+        err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+         frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+        if(err<best_err){
+          best_err=err;
+          best_vec[0]=candx;
+          best_vec[1]=candy;
+        }
+        if(_frame==OC_FRAME_PREV){
+          for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+            best_block_err[bi]=block_err[bi];
+            best_block_vec[bi][0]=candx;
+            best_block_vec[bi][1]=candy;
+          }
+        }
+      }
+      /*Use the same threshold for set B as in set A.*/
+      if(best_err>t2){
+        int best_site;
+        int nsites;
+        int sitei;
+        int site;
+        int b;
+        /*Square pattern search.*/
+        for(;;){
+          best_site=4;
+          /*Compose the bit flags for boundary conditions.*/
+          b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
+           OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
+          nsites=OC_SQUARE_NSITES[b];
+          for(sitei=0;sitei<nsites;sitei++){
+            site=OC_SQUARE_SITES[b][sitei];
+            candx=best_vec[0]+OC_SQUARE_DX[site];
+            candy=best_vec[1]+OC_SQUARE_DY[site];
+            hitbit=(ogg_int32_t)1<<candx+15;
+            if(hit_cache[candy+15]&hitbit)continue;
+            hit_cache[candy+15]|=hitbit;
+            err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+             frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+            if(err<best_err){
+              best_err=err;
+              best_site=site;
+            }
+            if(_frame==OC_FRAME_PREV){
+              for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+                best_block_err[bi]=block_err[bi];
+                best_block_vec[bi][0]=candx;
+                best_block_vec[bi][1]=candy;
+              }
+            }
+          }
+          if(best_site==4)break;
+          best_vec[0]+=OC_SQUARE_DX[best_site];
+          best_vec[1]+=OC_SQUARE_DY[best_site];
+        }
+        /*Final 4-MV search.*/
+        /*Simply use 1/4 of the macro block set A and B threshold as the
+           individual block threshold.*/
+        if(_frame==OC_FRAME_PREV){
+          t2>>=2;
+          for(bi=0;bi<4;bi++){
+            if(best_block_err[bi]>t2){
+              /*Square pattern search.
+                We do this in a slightly interesting manner.
+                We continue to check the SAD of all four blocks in the
+                 macro block.
+                This gives us two things:
+                 1) We can continue to use the hit_cache to avoid duplicate
+                     checks.
+                    Otherwise we could continue to read it, but not write to it
+                     without saving and restoring it for each block.
+                    Note that we could still eliminate a large number of
+                     duplicate checks by taking into account the site we came
+                     from when choosing the site list.
+                    We can still do that to avoid extra hit_cache queries, and
+                     it might even be a speed win.
+                 2) It gives us a slightly better chance of escaping local
+                     minima.
+                    We would not be here if we weren't doing a fairly bad job
+                     in finding a good vector, and checking these vectors can
+                     save us from 100 to several thousand points off our SAD 1
+                     in 15 times.
+                TODO: Is this a good idea?
+                Who knows.
+                It needs more testing.*/
+              for(;;){
+                int bestx;
+                int besty;
+                int bj;
+                bestx=best_block_vec[bi][0];
+                besty=best_block_vec[bi][1];
+                /*Compose the bit flags for boundary conditions.*/
+                b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
+                 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
+                nsites=OC_SQUARE_NSITES[b];
+                for(sitei=0;sitei<nsites;sitei++){
+                  site=OC_SQUARE_SITES[b][sitei];
+                  candx=bestx+OC_SQUARE_DX[site];
+                  candy=besty+OC_SQUARE_DY[site];
+                  hitbit=(ogg_int32_t)1<<candx+15;
+                  if(hit_cache[candy+15]&hitbit)continue;
+                  hit_cache[candy+15]|=hitbit;
+                  err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+                   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+                  if(err<best_err){
+                    best_err=err;
+                    best_vec[0]=candx;
+                    best_vec[1]=candy;
+                  }
+                  for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
+                    best_block_err[bj]=block_err[bj];
+                    best_block_vec[bj][0]=candx;
+                    best_block_vec[bj][1]=candy;
+                  }
+                }
+                if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
+                  break;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
+  candx=best_vec[0];
+  candy=best_vec[1];
+  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
+   frag_buf_offs,fragis,candx,candy,src,ref,ystride);
+  embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
+  embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
+  if(_frame==OC_FRAME_PREV){
+    for(bi=0;bi<4;bi++){
+      candx=best_block_vec[bi][0];
+      candy=best_block_vec[bi][1];
+      embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
+       frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
+      embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
+      embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
+    }
+  }
+}
+
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
+  oc_mv2         *mvs;
+  int             accum_p[2];
+  int             accum_g[2];
+  mvs=_enc->mb_info[_mbi].analysis_mv;
+  if(_enc->prevframe_dropped){
+    accum_p[0]=mvs[0][OC_FRAME_PREV][0];
+    accum_p[1]=mvs[0][OC_FRAME_PREV][1];
+  }
+  else accum_p[1]=accum_p[0]=0;
+  accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
+  accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
+  mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
+  mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
+  /*Move the motion vector predictors back a frame.*/
+  memmove(mvs+1,mvs,2*sizeof(*mvs));
+  /*Search the last frame.*/
+  oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
+  mvs[2][OC_FRAME_PREV][0]=accum_p[0];
+  mvs[2][OC_FRAME_PREV][1]=accum_p[1];
+  /*GOLDEN MVs are different from PREV MVs in that they're each absolute
+     offsets from some frame in the past rather than relative offsets from the
+     frame before.
+    For predictor calculation to make sense, we need them to be in the same
+     form as PREV MVs.*/
+  mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
+  mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
+  mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
+  mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
+  /*Search the golden frame.*/
+  oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
+  /*Put GOLDEN MVs back into absolute offset form.
+    The newest MV is already an absolute offset.*/
+  mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
+  mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
+  mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
+  mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
+ int _vec[2],int _best_err,int _frame){
+  const unsigned char *src;
+  const unsigned char *ref;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  mvoffset_base;
+  int                  best_site;
+  int                  sitei;
+  int                  err;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  ystride=_enc->state.ref_ystride[0];
+  mvoffset_base=_vec[0]+_vec[1]*ystride;
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    int site;
+    int xmask;
+    int ymask;
+    int dx;
+    int dy;
+    int mvoffset0;
+    int mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+    err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
+     mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
+ int _mbi,int _vec[2],unsigned _best_err,int _frame){
+  const unsigned char *src;
+  const unsigned char *ref;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  mvoffset_base;
+  int                  best_site;
+  int                  sitei;
+  int                  err;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  ystride=_enc->state.ref_ystride[0];
+  mvoffset_base=_vec[0]+_vec[1]*ystride;
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    int site;
+    int xmask;
+    int ymask;
+    int dx;
+    int dy;
+    int mvoffset0;
+    int mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+    err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
+     mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
+  oc_mb_enc_info *embs;
+  int             vec[2];
+  embs=_enc->mb_info;
+  vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
+  vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
+  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
+   _mbi,vec,embs[_mbi].satd[_frame],_frame);
+  embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
+  embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+  int mvoffset_base;
+  int best_site;
+  int sitei;
+  mvoffset_base=_vec[0]+_vec[1]*_ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    unsigned err;
+    int      site;
+    int      xmask;
+    int      ymask;
+    int      dx;
+    int      dy;
+    int      mvoffset0;
+    int      mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+    err=oc_enc_frag_sad2_thresh(_enc,_src,
+     _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+  int mvoffset_base;
+  int best_site;
+  int sitei;
+  mvoffset_base=_vec[0]+_vec[1]*_ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    unsigned err;
+    int      site;
+    int      xmask;
+    int      ymask;
+    int      dx;
+    int      dy;
+    int      mvoffset0;
+    int      mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+    err=oc_enc_frag_satd2_thresh(_enc,_src,
+     _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
+  oc_mb_enc_info      *embs;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  const unsigned char *src;
+  const unsigned char *ref;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  bi;
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  embs=_enc->mb_info;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    int       vec[2];
+    frag_offs=frag_buf_offs[fragis[bi]];
+    vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
+    vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
+    embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
+     src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
+    embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
+    embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];
+  }
+}

Copied: trunk/theora/lib/modedec.h (from rev 16442, trunk/theora/lib/enc/modedec.h)
===================================================================
--- trunk/theora/lib/modedec.h	                        (rev 0)
+++ trunk/theora/lib/modedec.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,4027 @@
+/*File generated by libtheora with OC_COLLECT_METRICS defined at compile time.*/
+#if !defined(_modedec_H)
+# define _modedec_H (1)
+
+
+
+# if defined(OC_COLLECT_METRICS)
+typedef struct oc_mode_metrics oc_mode_metrics;
+# endif
+typedef struct oc_mode_rd      oc_mode_rd;
+
+
+
+/*The number of extra bits of precision at which to store rate metrics.*/
+# define OC_BIT_SCALE  (6)
+/*The number of extra bits of precision at which to store RMSE metrics.
+  This must be at least half OC_BIT_SCALE (rounded up).*/
+# define OC_RMSE_SCALE (5)
+/*The number of bins to partition statistics into.*/
+# define OC_SAD_BINS   (24)
+/*The number of bits of precision to drop from SAD scores to assign them to a
+   bin.*/
+# define OC_SAD_SHIFT  (9)
+
+
+
+# if defined(OC_COLLECT_METRICS)
+struct oc_mode_metrics{
+  double fragw;
+  double satd;
+  double rate;
+  double rmse;
+  double satd2;
+  double satdrate;
+  double rate2;
+  double satdrmse;
+  double rmse2;
+};
+
+
+int             oc_has_mode_metrics;
+oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];
+# endif
+
+
+
+struct oc_mode_rd{
+  ogg_int16_t rate;
+  ogg_int16_t rmse;
+};
+
+
+# if !defined(OC_COLLECT_METRICS)
+static const
+# endif
+oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={
+  {
+    {
+      /*Y'  qi=0  INTRA*/
+      {
+        {   87,  -66},{  132, 1611},{  197, 3474},{  285, 5130},
+        {  376, 6419},{  450, 7545},{  521, 8587},{  600, 9587},
+        {  689,10498},{  790,11348},{  899,12158},{ 1030,12855},
+        { 1166,13459},{ 1276,14052},{ 1353,14732},{ 1444,15425},
+        { 1535,16101},{ 1609,16856},{ 1697,17532},{ 1823,17995},
+        { 1962,18426},{ 2085,18919},{ 2201,19503},{ 2304,20307}
+      },
+      /*Y'  qi=0  INTER*/
+      {
+        {   32, -105},{   40, 1268},{   54, 2919},{   91, 4559},
+        {  118, 6244},{  132, 7932},{  142, 9514},{  149,10989},
+        {  155,12375},{  161,13679},{  168,14958},{  176,16215},
+        {  187,17431},{  196,18623},{  207,19790},{  218,20941},
+        {  230,22083},{  246,23213},{  265,24333},{  292,25439},
+        {  328,26512},{  372,27538},{  427,28522},{  494,29479}
+      }
+    },
+    {
+      /*Cb  qi=0  INTRA*/
+      {
+        {    1,    6},{   27,  368},{   52,  738},{   67, 1171},
+        {   80, 1642},{   99, 2134},{  110, 2642},{  112, 3144},
+        {  126, 3578},{  154, 3967},{  167, 4387},{  172, 4839},
+        {  191, 5278},{  208, 5666},{  220, 6036},{  223, 6398},
+        {  227, 6814},{  253, 7157},{  284, 7403},{  292, 7699},
+        {  314, 7983},{  339, 8203},{  363, 8460},{  399, 8919}
+      },
+      /*Cb  qi=0  INTER*/
+      {
+        {   68,  -55},{   63,  275},{   58,  602},{   53,  936},
+        {   50, 1290},{   54, 1691},{   58, 2116},{   62, 2553},
+        {   67, 2992},{   72, 3422},{   78, 3843},{   84, 4253},
+        {   89, 4658},{   94, 5062},{   98, 5455},{  100, 5848},
+        {  102, 6231},{  104, 6604},{  104, 6982},{  105, 7359},
+        {  105, 7733},{  104, 8104},{  105, 8465},{  111, 8828}
+      }
+    },
+    {
+      /*Cr  qi=0  INTRA*/
+      {
+        {    1,    8},{   23,  375},{   47,  759},{   63, 1220},
+        {   71, 1693},{   82, 2171},{   94, 2652},{  109, 3103},
+        {  125, 3567},{  133, 3995},{  151, 4375},{  168, 4819},
+        {  174, 5244},{  190, 5635},{  215, 6005},{  242, 6347},
+        {  257, 6758},{  280, 7068},{  311, 7336},{  326, 7652},
+        {  346, 7968},{  372, 8213},{  388, 8515},{  408, 9060}
+      },
+      /*Cr  qi=0  INTER*/
+      {
+        {   69,    0},{   60,  314},{   49,  624},{   45,  943},
+        {   45, 1285},{   49, 1691},{   55, 2130},{   62, 2560},
+        {   71, 2973},{   79, 3385},{   85, 3800},{   89, 4207},
+        {   92, 4620},{   95, 5037},{   96, 5436},{   97, 5839},
+        {   98, 6252},{   99, 6653},{   99, 7038},{  103, 7426},
+        {  107, 7810},{  108, 8178},{  107, 8539},{  106, 8937}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=1  INTRA*/
+      {
+        {   81,  -71},{  133, 1610},{  203, 3460},{  296, 5083},
+        {  392, 6342},{  467, 7454},{  541, 8486},{  625, 9466},
+        {  716,10352},{  823,11181},{  940,11961},{ 1074,12643},
+        { 1211,13233},{ 1324,13807},{ 1408,14489},{ 1504,15167},
+        { 1598,15824},{ 1679,16544},{ 1788,17161},{ 1928,17579},
+        { 2070,17991},{ 2202,18456},{ 2324,19021},{ 2425,19894}
+      },
+      /*Y'  qi=1  INTER*/
+      {
+        {   34,    4},{   40, 1307},{   55, 2914},{   93, 4555},
+        {  120, 6243},{  134, 7912},{  144, 9468},{  152,10918},
+        {  158,12275},{  164,13569},{  171,14846},{  180,16098},
+        {  191,17310},{  204,18484},{  216,19636},{  228,20779},
+        {  242,21912},{  261,23036},{  286,24146},{  320,25221},
+        {  363,26265},{  418,27261},{  485,28203},{  551,29148}
+      }
+    },
+    {
+      /*Cb  qi=1  INTRA*/
+      {
+        {    1,    6},{   28,  367},{   52,  738},{   68, 1172},
+        {   86, 1644},{  106, 2135},{  115, 2642},{  119, 3141},
+        {  132, 3569},{  157, 3951},{  172, 4366},{  177, 4819},
+        {  194, 5258},{  211, 5638},{  224, 6006},{  233, 6367},
+        {  236, 6784},{  258, 7121},{  299, 7357},{  319, 7637},
+        {  337, 7921},{  358, 8141},{  381, 8367},{  401, 8768}
+      },
+      /*Cb  qi=1  INTER*/
+      {
+        {   95,  -31},{   81,  295},{   67,  614},{   53,  953},
+        {   48, 1305},{   51, 1700},{   56, 2125},{   61, 2563},
+        {   67, 3008},{   73, 3435},{   79, 3844},{   85, 4251},
+        {   90, 4663},{   95, 5073},{   98, 5458},{  100, 5844},
+        {  101, 6231},{  102, 6606},{  102, 6980},{  103, 7347},
+        {  104, 7726},{  105, 8096},{  105, 8453},{  105, 8789}
+      }
+    },
+    {
+      /*Cr  qi=1  INTRA*/
+      {
+        {    1,    8},{   25,  375},{   50,  759},{   65, 1221},
+        {   74, 1695},{   86, 2172},{  101, 2651},{  117, 3101},
+        {  129, 3561},{  135, 3985},{  153, 4368},{  171, 4807},
+        {  182, 5223},{  202, 5608},{  225, 5964},{  251, 6300},
+        {  271, 6697},{  295, 6978},{  324, 7235},{  348, 7558},
+        {  367, 7877},{  394, 8101},{  413, 8386},{  409, 8945}
+      },
+      /*Cr  qi=1  INTER*/
+      {
+        {   66,   11},{   59,  323},{   51,  631},{   44,  949},
+        {   44, 1292},{   49, 1703},{   56, 2140},{   62, 2566},
+        {   69, 2991},{   77, 3397},{   84, 3799},{   89, 4211},
+        {   93, 4634},{   94, 5049},{   95, 5444},{   96, 5854},
+        {   94, 6260},{   95, 6640},{   96, 7032},{  101, 7423},
+        {  104, 7790},{  105, 8158},{  109, 8527},{  108, 8872}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=2  INTRA*/
+      {
+        {   87,  -72},{  139, 1607},{  213, 3426},{  315, 4992},
+        {  416, 6217},{  495, 7315},{  574, 8317},{  666, 9265},
+        {  763,10124},{  875,10906},{ 1001,11654},{ 1147,12305},
+        { 1289,12865},{ 1407,13424},{ 1503,14076},{ 1610,14724},
+        { 1720,15342},{ 1815,16020},{ 1937,16579},{ 2084,16981},
+        { 2236,17371},{ 2385,17779},{ 2536,18250},{ 2689,18931}
+      },
+      /*Y'  qi=2  INTER*/
+      {
+        {   30,   -2},{   40, 1308},{   57, 2921},{   96, 4567},
+        {  122, 6260},{  136, 7902},{  148, 9418},{  156,10826},
+        {  162,12157},{  169,13448},{  177,14709},{  188,15938},
+        {  200,17133},{  213,18295},{  228,19433},{  245,20564},
+        {  264,21685},{  289,22790},{  323,23876},{  368,24916},
+        {  427,25906},{  499,26837},{  585,27700},{  680,28514}
+      }
+    },
+    {
+      /*Cb  qi=2  INTRA*/
+      {
+        {    1,    6},{   30,  367},{   58,  738},{   77, 1172},
+        {   93, 1645},{  111, 2137},{  123, 2642},{  126, 3133},
+        {  136, 3553},{  162, 3934},{  178, 4352},{  183, 4803},
+        {  199, 5231},{  220, 5596},{  235, 5957},{  245, 6314},
+        {  256, 6718},{  286, 7048},{  320, 7285},{  336, 7568},
+        {  366, 7829},{  387, 8045},{  405, 8261},{  445, 8550}
+      },
+      /*Cb  qi=2  INTER*/
+      {
+        {  115,  -61},{   93,  277},{   71,  609},{   54,  963},
+        {   49, 1329},{   53, 1715},{   58, 2138},{   63, 2583},
+        {   69, 3017},{   75, 3442},{   81, 3857},{   88, 4263},
+        {   93, 4667},{   96, 5065},{  101, 5451},{  101, 5832},
+        {  102, 6213},{  103, 6593},{  103, 6968},{  104, 7336},
+        {  104, 7710},{  105, 8076},{  106, 8440},{  106, 8822}
+      }
+    },
+    {
+      /*Cr  qi=2  INTRA*/
+      {
+        {    1,    8},{   27,  375},{   54,  759},{   70, 1222},
+        {   79, 1696},{   89, 2173},{  106, 2652},{  123, 3098},
+        {  135, 3553},{  143, 3972},{  161, 4348},{  181, 4782},
+        {  194, 5189},{  213, 5565},{  235, 5907},{  266, 6229},
+        {  286, 6618},{  311, 6897},{  339, 7152},{  362, 7454},
+        {  392, 7721},{  416, 7946},{  429, 8227},{  458, 8540}
+      },
+      /*Cr  qi=2  INTER*/
+      {
+        {   74,   20},{   63,  330},{   51,  635},{   44,  942},
+        {   47, 1287},{   54, 1710},{   59, 2147},{   65, 2571},
+        {   72, 2996},{   79, 3413},{   86, 3820},{   91, 4230},
+        {   93, 4642},{   95, 5046},{   95, 5442},{   95, 5839},
+        {   96, 6243},{   97, 6641},{   99, 7021},{  101, 7396},
+        {  103, 7764},{  106, 8138},{  109, 8507},{  114, 8851}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=3  INTRA*/
+      {
+        {   91,  -67},{  141, 1606},{  219, 3405},{  328, 4929},
+        {  433, 6122},{  515, 7209},{  598, 8204},{  693, 9145},
+        {  796, 9986},{  912,10756},{ 1045,11471},{ 1200,12079},
+        { 1345,12640},{ 1471,13179},{ 1571,13809},{ 1678,14450},
+        { 1798,15047},{ 1905,15701},{ 2043,16205},{ 2202,16569},
+        { 2351,16971},{ 2501,17393},{ 2660,17851},{ 2825,18455}
+      },
+      /*Y'  qi=3  INTER*/
+      {
+        {   53, -164},{   38, 1314},{   59, 2917},{   99, 4563},
+        {  124, 6253},{  139, 7882},{  150, 9375},{  159,10749},
+        {  166,12059},{  173,13349},{  183,14608},{  194,15826},
+        {  208,17003},{  223,18150},{  240,19287},{  259,20411},
+        {  284,21508},{  317,22593},{  359,23656},{  414,24671},
+        {  483,25634},{  569,26519},{  670,27332},{  786,28072}
+      }
+    },
+    {
+      /*Cb  qi=3  INTRA*/
+      {
+        {    1,    5},{   31,  367},{   58,  739},{   78, 1173},
+        {   96, 1645},{  113, 2134},{  125, 2638},{  133, 3127},
+        {  148, 3542},{  171, 3915},{  184, 4328},{  192, 4776},
+        {  209, 5197},{  230, 5556},{  245, 5909},{  252, 6261},
+        {  272, 6641},{  304, 6942},{  330, 7184},{  342, 7477},
+        {  380, 7736},{  404, 7962},{  428, 8151},{  469, 8430}
+      },
+      /*Cb  qi=3  INTER*/
+      {
+        {   86,  -29},{   72,  296},{   58,  618},{   46,  964},
+        {   47, 1338},{   51, 1743},{   56, 2158},{   63, 2594},
+        {   69, 3035},{   77, 3455},{   84, 3859},{   89, 4266},
+        {   94, 4673},{   98, 5074},{  101, 5460},{  101, 5842},
+        {  101, 6217},{  101, 6593},{  102, 6964},{  104, 7325},
+        {  103, 7696},{  103, 8056},{  104, 8430},{  103, 8792}
+      }
+    },
+    {
+      /*Cr  qi=3  INTRA*/
+      {
+        {    1,    8},{   27,  374},{   56,  759},{   74, 1221},
+        {   83, 1696},{   96, 2173},{  113, 2650},{  127, 3091},
+        {  140, 3542},{  151, 3960},{  164, 4334},{  188, 4764},
+        {  208, 5144},{  224, 5493},{  250, 5841},{  278, 6162},
+        {  298, 6548},{  334, 6816},{  365, 7045},{  388, 7343},
+        {  419, 7613},{  443, 7836},{  455, 8105},{  484, 8445}
+      },
+      /*Cr  qi=3  INTER*/
+      {
+        {   76,   26},{   65,  332},{   53,  638},{   45,  945},
+        {   45, 1304},{   53, 1725},{   60, 2153},{   68, 2584},
+        {   74, 3007},{   81, 3425},{   87, 3844},{   91, 4253},
+        {   94, 4657},{   95, 5061},{   94, 5462},{   94, 5856},
+        {   95, 6250},{   96, 6635},{   97, 7014},{  101, 7393},
+        {  104, 7761},{  106, 8137},{  109, 8506},{  111, 8823}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=4  INTRA*/
+      {
+        {   80,  -67},{  143, 1603},{  227, 3378},{  344, 4861},
+        {  454, 6026},{  537, 7104},{  626, 8089},{  725, 9006},
+        {  830, 9827},{  950,10581},{ 1089,11270},{ 1257,11826},
+        { 1409,12366},{ 1535,12912},{ 1640,13528},{ 1753,14173},
+        { 1884,14756},{ 2007,15368},{ 2148,15852},{ 2307,16212},
+        { 2464,16591},{ 2614,17019},{ 2785,17455},{ 2970,17963}
+      },
+      /*Y'  qi=4  INTER*/
+      {
+        {   50, -145},{   38, 1324},{   61, 2921},{  102, 4566},
+        {  127, 6248},{  142, 7845},{  154, 9300},{  163,10656},
+        {  169,11965},{  177,13246},{  188,14495},{  202,15702},
+        {  218,16864},{  236,18003},{  256,19124},{  278,20233},
+        {  307,21330},{  347,22398},{  398,23437},{  463,24429},
+        {  546,25343},{  649,26170},{  767,26935},{  888,27674}
+      }
+    },
+    {
+      /*Cb  qi=4  INTRA*/
+      {
+        {    1,    5},{   33,  367},{   61,  739},{   80, 1173},
+        {   98, 1646},{  114, 2136},{  126, 2639},{  137, 3124},
+        {  152, 3535},{  176, 3903},{  194, 4307},{  206, 4753},
+        {  222, 5165},{  242, 5508},{  260, 5857},{  272, 6205},
+        {  294, 6559},{  332, 6848},{  356, 7104},{  364, 7389},
+        {  396, 7637},{  415, 7878},{  446, 8064},{  506, 8294}
+      },
+      /*Cb  qi=4  INTER*/
+      {
+        {   86,  -15},{   73,  308},{   60,  627},{   46,  967},
+        {   47, 1343},{   51, 1754},{   56, 2183},{   63, 2615},
+        {   70, 3044},{   79, 3459},{   85, 3866},{   90, 4276},
+        {   94, 4686},{   97, 5088},{  100, 5467},{  102, 5837},
+        {  102, 6205},{  101, 6569},{  103, 6939},{  104, 7317},
+        {  105, 7690},{  107, 8043},{  107, 8394},{  111, 8736}
+      }
+    },
+    {
+      /*Cr  qi=4  INTRA*/
+      {
+        {    1,    7},{   28,  375},{   57,  759},{   79, 1221},
+        {   92, 1697},{  105, 2174},{  122, 2648},{  135, 3085},
+        {  146, 3530},{  157, 3947},{  171, 4316},{  195, 4737},
+        {  218, 5117},{  239, 5445},{  268, 5767},{  295, 6074},
+        {  315, 6460},{  355, 6735},{  392, 6933},{  418, 7218},
+        {  448, 7495},{  471, 7688},{  481, 7954},{  504, 8313}
+      },
+      /*Cr  qi=4  INTER*/
+      {
+        {   68,   28},{   57,  334},{   47,  639},{   43,  953},
+        {   48, 1314},{   54, 1736},{   59, 2169},{   69, 2592},
+        {   78, 3017},{   84, 3434},{   88, 3850},{   92, 4260},
+        {   95, 4663},{   96, 5068},{   95, 5455},{   95, 5839},
+        {   96, 6243},{   97, 6626},{   98, 7006},{  101, 7390},
+        {  104, 7755},{  108, 8115},{  111, 8471},{  110, 8825}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=5  INTRA*/
+      {
+        {   84,  -69},{  147, 1599},{  237, 3350},{  360, 4796},
+        {  475, 5934},{  562, 6992},{  657, 7953},{  765, 8837},
+        {  874, 9641},{  998,10384},{ 1146,11047},{ 1322,11572},
+        { 1484,12076},{ 1617,12609},{ 1731,13203},{ 1856,13806},
+        { 1995,14367},{ 2132,14936},{ 2289,15386},{ 2460,15721},
+        { 2635,16066},{ 2802,16442},{ 2980,16805},{ 3177,17272}
+      },
+      /*Y'  qi=5  INTER*/
+      {
+        {   38,  -86},{   37, 1349},{   64, 2920},{  105, 4563},
+        {  129, 6236},{  145, 7809},{  158, 9236},{  167,10572},
+        {  174,11871},{  182,13141},{  195,14368},{  212,15558},
+        {  230,16706},{  250,17828},{  274,18944},{  303,20041},
+        {  342,21116},{  394,22152},{  460,23144},{  543,24073},
+        {  648,24919},{  773,25673},{  922,26323},{ 1084,26924}
+      }
+    },
+    {
+      /*Cb  qi=5  INTRA*/
+      {
+        {    1,    5},{   34,  367},{   63,  739},{   82, 1174},
+        {  102, 1647},{  119, 2137},{  134, 2639},{  145, 3121},
+        {  161, 3529},{  189, 3891},{  207, 4290},{  216, 4721},
+        {  232, 5113},{  258, 5455},{  277, 5798},{  294, 6124},
+        {  322, 6427},{  352, 6697},{  370, 6982},{  384, 7283},
+        {  423, 7529},{  448, 7766},{  478, 7943},{  527, 8151}
+      },
+      /*Cb  qi=5  INTER*/
+      {
+        {   83,  -49},{   69,  284},{   55,  611},{   48,  961},
+        {   49, 1355},{   52, 1769},{   58, 2191},{   65, 2616},
+        {   73, 3041},{   80, 3460},{   87, 3868},{   92, 4276},
+        {   95, 4682},{   98, 5077},{  100, 5459},{  102, 5827},
+        {  102, 6200},{  102, 6568},{  103, 6930},{  103, 7303},
+        {  104, 7672},{  106, 8032},{  106, 8391},{  106, 8727}
+      }
+    },
+    {
+      /*Cr  qi=5  INTRA*/
+      {
+        {    1,    8},{   28,  375},{   57,  760},{   81, 1222},
+        {   99, 1696},{  111, 2175},{  125, 2648},{  140, 3079},
+        {  152, 3520},{  162, 3927},{  179, 4294},{  203, 4714},
+        {  225, 5080},{  254, 5389},{  286, 5703},{  318, 5997},
+        {  342, 6364},{  380, 6640},{  416, 6837},{  445, 7103},
+        {  473, 7370},{  497, 7562},{  514, 7811},{  549, 8148}
+      },
+      /*Cr  qi=5  INTER*/
+      {
+        {   60,    6},{   54,  323},{   46,  638},{   43,  958},
+        {   45, 1329},{   54, 1749},{   61, 2175},{   70, 2600},
+        {   79, 3021},{   85, 3437},{   89, 3847},{   93, 4254},
+        {   95, 4660},{   96, 5065},{   95, 5456},{   95, 5849},
+        {   96, 6243},{   96, 6621},{   97, 6996},{  101, 7366},
+        {  104, 7722},{  107, 8088},{  111, 8448},{  119, 8816}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=6  INTRA*/
+      {
+        {   88,  -69},{  151, 1593},{  251, 3294},{  387, 4681},
+        {  507, 5790},{  601, 6837},{  702, 7787},{  813, 8648},
+        {  927, 9427},{ 1059,10152},{ 1213,10787},{ 1399,11284},
+        { 1568,11781},{ 1705,12312},{ 1823,12890},{ 1957,13482},
+        { 2106,14036},{ 2249,14600},{ 2411,15042},{ 2588,15359},
+        { 2772,15699},{ 2947,16062},{ 3127,16429},{ 3320,16849}
+      },
+      /*Y'  qi=6  INTER*/
+      {
+        {   44,  -80},{   36, 1346},{   69, 2919},{  111, 4563},
+        {  136, 6216},{  154, 7746},{  168, 9139},{  178,10461},
+        {  185,11747},{  195,13007},{  211,14229},{  230,15408},
+        {  250,16547},{  274,17663},{  302,18769},{  339,19851},
+        {  386,20907},{  446,21933},{  527,22884},{  631,23746},
+        {  760,24512},{  914,25178},{ 1087,25758},{ 1278,26262}
+      }
+    },
+    {
+      /*Cb  qi=6  INTRA*/
+      {
+        {    1,    4},{   36,  367},{   66,  739},{   84, 1174},
+        {  105, 1648},{  126, 2139},{  140, 2639},{  149, 3116},
+        {  164, 3523},{  194, 3880},{  217, 4271},{  226, 4694},
+        {  243, 5077},{  270, 5407},{  291, 5742},{  310, 6061},
+        {  340, 6340},{  373, 6609},{  394, 6890},{  409, 7189},
+        {  444, 7434},{  469, 7652},{  499, 7853},{  559, 8135}
+      },
+      /*Cb  qi=6  INTER*/
+      {
+        {   68,  -46},{   60,  291},{   50,  623},{   49,  971},
+        {   50, 1357},{   55, 1781},{   61, 2211},{   69, 2634},
+        {   78, 3052},{   86, 3466},{   91, 3882},{   95, 4292},
+        {   98, 4691},{  101, 5080},{  102, 5458},{  103, 5830},
+        {  103, 6192},{  104, 6554},{  104, 6916},{  106, 7278},
+        {  108, 7641},{  110, 8004},{  112, 8371},{  112, 8758}
+      }
+    },
+    {
+      /*Cr  qi=6  INTRA*/
+      {
+        {    1,    8},{   29,  375},{   59,  760},{   84, 1223},
+        {   99, 1698},{  112, 2176},{  129, 2647},{  143, 3076},
+        {  156, 3510},{  168, 3906},{  189, 4269},{  220, 4682},
+        {  241, 5047},{  266, 5342},{  299, 5649},{  331, 5954},
+        {  357, 6309},{  393, 6579},{  431, 6765},{  467, 6997},
+        {  501, 7276},{  520, 7488},{  525, 7749},{  548, 8146}
+      },
+      /*Cr  qi=6  INTER*/
+      {
+        {   94,   31},{   69,  335},{   47,  641},{   43,  967},
+        {   50, 1350},{   57, 1772},{   65, 2197},{   74, 2625},
+        {   83, 3043},{   90, 3454},{   94, 3867},{   97, 4273},
+        {   98, 4671},{   99, 5068},{   99, 5461},{   98, 5857},
+        {   98, 6245},{   99, 6610},{  103, 6975},{  105, 7345},
+        {  108, 7712},{  111, 8073},{  113, 8415},{  119, 8768}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=7  INTRA*/
+      {
+        {   92,  -70},{  156, 1590},{  261, 3267},{  403, 4618},
+        {  529, 5704},{  628, 6730},{  736, 7657},{  856, 8491},
+        {  978, 9246},{ 1118, 9943},{ 1281,10550},{ 1472,11028},
+        { 1645,11507},{ 1793,12008},{ 1924,12565},{ 2067,13130},
+        { 2229,13638},{ 2388,14160},{ 2558,14584},{ 2744,14886},
+        { 2932,15194},{ 3116,15531},{ 3311,15858},{ 3538,16197}
+      },
+      /*Y'  qi=7  INTER*/
+      {
+        {   43,   -8},{   36, 1351},{   71, 2923},{  112, 4568},
+        {  138, 6201},{  157, 7705},{  171, 9083},{  181,10390},
+        {  189,11664},{  202,12910},{  220,14121},{  241,15281},
+        {  266,16401},{  295,17507},{  328,18608},{  371,19677},
+        {  430,20701},{  508,21676},{  604,22588},{  727,23397},
+        {  878,24093},{ 1055,24690},{ 1263,25151},{ 1496,25504}
+      }
+    },
+    {
+      /*Cb  qi=7  INTRA*/
+      {
+        {    1,    5},{   40,  367},{   72,  740},{   89, 1175},
+        {  108, 1649},{  129, 2140},{  143, 2637},{  154, 3110},
+        {  169, 3507},{  198, 3860},{  224, 4237},{  235, 4652},
+        {  253, 5037},{  282, 5358},{  307, 5674},{  329, 5986},
+        {  361, 6273},{  393, 6527},{  419, 6777},{  435, 7078},
+        {  467, 7342},{  495, 7554},{  529, 7757},{  591, 8053}
+      },
+      /*Cb  qi=7  INTER*/
+      {
+        {   79,  -33},{   68,  299},{   56,  627},{   50,  978},
+        {   51, 1366},{   55, 1786},{   61, 2213},{   70, 2642},
+        {   80, 3062},{   87, 3474},{   92, 3886},{   96, 4292},
+        {   99, 4684},{  102, 5072},{  103, 5450},{  104, 5814},
+        {  104, 6176},{  104, 6538},{  107, 6905},{  110, 7270},
+        {  110, 7625},{  110, 7978},{  111, 8340},{  117, 8674}
+      }
+    },
+    {
+      /*Cr  qi=7  INTRA*/
+      {
+        {    2,    7},{   31,  375},{   62,  760},{   87, 1223},
+        {  103, 1698},{  115, 2175},{  131, 2644},{  147, 3066},
+        {  161, 3494},{  175, 3889},{  199, 4250},{  229, 4653},
+        {  250, 5001},{  279, 5275},{  311, 5577},{  343, 5889},
+        {  376, 6227},{  417, 6486},{  457, 6689},{  484, 6925},
+        {  518, 7174},{  544, 7393},{  549, 7662},{  577, 8050}
+      },
+      /*Cr  qi=7  INTER*/
+      {
+        {   89,   22},{   62,  332},{   45,  641},{   47,  976},
+        {   52, 1363},{   59, 1779},{   67, 2203},{   76, 2628},
+        {   84, 3046},{   90, 3460},{   94, 3875},{   98, 4272},
+        {   99, 4666},{   98, 5063},{   98, 5459},{   98, 5849},
+        {   99, 6226},{  101, 6594},{  104, 6957},{  109, 7324},
+        {  109, 7686},{  111, 8042},{  115, 8379},{  119, 8699}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=8  INTRA*/
+      {
+        {   91,  -69},{  160, 1585},{  274, 3226},{  423, 4538},
+        {  557, 5596},{  664, 6595},{  778, 7506},{  905, 8319},
+        { 1038, 9035},{ 1186, 9701},{ 1355,10292},{ 1554,10754},
+        { 1739,11196},{ 1904,11639},{ 2047,12184},{ 2194,12763},
+        { 2361,13256},{ 2529,13753},{ 2709,14155},{ 2902,14433},
+        { 3100,14723},{ 3292,15026},{ 3489,15327},{ 3714,15705}
+      },
+      /*Y'  qi=8  INTER*/
+      {
+        {   32, -157},{   33, 1346},{   74, 2914},{  116, 4554},
+        {  142, 6172},{  162, 7648},{  177, 9004},{  186,10300},
+        {  196,11570},{  210,12808},{  231,14001},{  256,15150},
+        {  285,16259},{  319,17352},{  359,18435},{  415,19475},
+        {  489,20470},{  584,21400},{  703,22246},{  852,22968},
+        { 1038,23556},{ 1253,24032},{ 1503,24367},{ 1778,24628}
+      }
+    },
+    {
+      /*Cb  qi=8  INTRA*/
+      {
+        {    1,    4},{   42,  367},{   75,  740},{   93, 1176},
+        {  111, 1649},{  128, 2139},{  144, 2635},{  157, 3103},
+        {  174, 3494},{  206, 3844},{  233, 4207},{  251, 4605},
+        {  277, 4980},{  304, 5284},{  335, 5584},{  359, 5888},
+        {  393, 6152},{  432, 6398},{  455, 6656},{  471, 6956},
+        {  502, 7193},{  528, 7405},{  562, 7630},{  603, 7922}
+      },
+      /*Cb  qi=8  INTER*/
+      {
+        {   77,  -37},{   68,  299},{   58,  632},{   50,  991},
+        {   50, 1382},{   55, 1799},{   62, 2226},{   73, 2647},
+        {   82, 3066},{   90, 3480},{   94, 3891},{   96, 4296},
+        {   98, 4687},{  101, 5073},{  103, 5456},{  104, 5817},
+        {  105, 6170},{  106, 6523},{  107, 6886},{  108, 7250},
+        {  109, 7600},{  110, 7955},{  111, 8305},{  112, 8641}
+      }
+    },
+    {
+      /*Cr  qi=8  INTRA*/
+      {
+        {    2,    7},{   33,  375},{   64,  760},{   92, 1224},
+        {  111, 1700},{  122, 2173},{  137, 2637},{  156, 3055},
+        {  172, 3476},{  186, 3856},{  211, 4211},{  242, 4597},
+        {  263, 4939},{  292, 5214},{  335, 5489},{  376, 5772},
+        {  406, 6099},{  440, 6378},{  483, 6578},{  517, 6797},
+        {  550, 7049},{  571, 7283},{  583, 7560},{  618, 7967}
+      },
+      /*Cr  qi=8  INTER*/
+      {
+        {   74,   25},{   58,  328},{   43,  637},{   45,  980},
+        {   51, 1371},{   59, 1788},{   69, 2207},{   79, 2630},
+        {   86, 3051},{   91, 3470},{   95, 3880},{   97, 4280},
+        {   98, 4680},{   97, 5074},{   96, 5456},{   97, 5839},
+        {   99, 6219},{  101, 6583},{  103, 6945},{  106, 7312},
+        {  110, 7671},{  114, 8009},{  115, 8345},{  117, 8686}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=9  INTRA*/
+      {
+        {  104,  -68},{  164, 1580},{  288, 3173},{  448, 4439},
+        {  587, 5485},{  702, 6465},{  824, 7351},{  958, 8148},
+        { 1096, 8845},{ 1253, 9480},{ 1432,10047},{ 1640,10494},
+        { 1835,10926},{ 2015,11350},{ 2166,11871},{ 2321,12428},
+        { 2508,12876},{ 2684,13345},{ 2866,13741},{ 3069,13991},
+        { 3281,14243},{ 3487,14518},{ 3689,14813},{ 3911,15175}
+      },
+      /*Y'  qi=9  INTER*/
+      {
+        {   47, -140},{   34, 1348},{   77, 2915},{  119, 4552},
+        {  145, 6150},{  166, 7600},{  182, 8936},{  192,10221},
+        {  203,11482},{  220,12711},{  244,13886},{  274,15012},
+        {  308,16111},{  349,17190},{  401,18244},{  470,19257},
+        {  561,20209},{  680,21069},{  830,21822},{ 1010,22463},
+        { 1227,22971},{ 1482,23328},{ 1769,23544},{ 2077,23655}
+      }
+    },
+    {
+      /*Cb  qi=9  INTRA*/
+      {
+        {    1,    5},{   43,  367},{   76,  740},{   95, 1176},
+        {  114, 1649},{  135, 2138},{  153, 2629},{  165, 3091},
+        {  184, 3481},{  217, 3831},{  244, 4187},{  260, 4572},
+        {  290, 4930},{  320, 5231},{  351, 5521},{  379, 5812},
+        {  414, 6055},{  452, 6307},{  483, 6564},{  502, 6848},
+        {  525, 7115},{  554, 7321},{  589, 7533},{  626, 7833}
+      },
+      /*Cb  qi=9  INTER*/
+      {
+        {  101,  -43},{   81,  298},{   62,  637},{   49,  989},
+        {   51, 1381},{   56, 1806},{   65, 2231},{   74, 2653},
+        {   84, 3071},{   91, 3482},{   95, 3892},{   97, 4293},
+        {   99, 4684},{  101, 5066},{  103, 5437},{  103, 5793},
+        {  103, 6148},{  104, 6511},{  105, 6867},{  107, 7221},
+        {  110, 7572},{  111, 7926},{  112, 8283},{  116, 8625}
+      }
+    },
+    {
+      /*Cr  qi=9  INTRA*/
+      {
+        {    2,    7},{   35,  375},{   66,  761},{   93, 1224},
+        {  112, 1700},{  126, 2173},{  144, 2633},{  165, 3047},
+        {  183, 3458},{  199, 3835},{  224, 4191},{  257, 4558},
+        {  283, 4887},{  309, 5176},{  351, 5446},{  397, 5713},
+        {  433, 6017},{  469, 6283},{  508, 6480},{  546, 6687},
+        {  579, 6945},{  600, 7182},{  610, 7434},{  623, 7793}
+      },
+      /*Cr  qi=9  INTER*/
+      {
+        {   77,   15},{   57,  330},{   45,  640},{   48,  980},
+        {   54, 1380},{   61, 1802},{   70, 2220},{   80, 2639},
+        {   87, 3057},{   92, 3474},{   94, 3882},{   98, 4282},
+        {   98, 4675},{   97, 5062},{   97, 5450},{   98, 5829},
+        {  100, 6197},{  101, 6561},{  104, 6927},{  107, 7289},
+        {  113, 7638},{  117, 7978},{  119, 8311},{  117, 8629}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=10  INTRA*/
+      {
+        {  101,  -69},{  168, 1574},{  299, 3143},{  465, 4386},
+        {  610, 5410},{  736, 6353},{  866, 7207},{ 1006, 7982},
+        { 1153, 8655},{ 1319, 9261},{ 1504, 9812},{ 1719,10248},
+        { 1928,10653},{ 2116,11056},{ 2282,11550},{ 2458,12070},
+        { 2654,12492},{ 2846,12923},{ 3043,13291},{ 3249,13537},
+        { 3466,13764},{ 3682,13999},{ 3896,14268},{ 4145,14548}
+      },
+      /*Y'  qi=10  INTER*/
+      {
+        {   48,  -94},{   34, 1355},{   81, 2920},{  124, 4545},
+        {  151, 6113},{  174, 7532},{  190, 8850},{  201,10125},
+        {  214,11379},{  235,12591},{  264,13745},{  299,14859},
+        {  338,15948},{  388,17008},{  456,18029},{  546,18988},
+        {  661,19877},{  808,20666},{  993,21321},{ 1218,21835},
+        { 1481,22203},{ 1783,22420},{ 2117,22504},{ 2469,22481}
+      }
+    },
+    {
+      /*Cb  qi=10  INTRA*/
+      {
+        {    2,    4},{   44,  367},{   79,  740},{   99, 1178},
+        {  117, 1652},{  137, 2141},{  156, 2630},{  170, 3089},
+        {  192, 3474},{  227, 3813},{  259, 4157},{  282, 4526},
+        {  310, 4860},{  342, 5140},{  377, 5425},{  400, 5714},
+        {  436, 5952},{  475, 6194},{  496, 6468},{  522, 6748},
+        {  559, 6996},{  587, 7216},{  617, 7433},{  673, 7678}
+      },
+      /*Cb  qi=10  INTER*/
+      {
+        {   87,  -37},{   72,  301},{   58,  636},{   49,  995},
+        {   51, 1394},{   57, 1819},{   66, 2241},{   78, 2660},
+        {   87, 3074},{   93, 3482},{   97, 3891},{   99, 4294},
+        {  101, 4678},{  103, 5050},{  105, 5414},{  106, 5773},
+        {  107, 6134},{  108, 6485},{  110, 6832},{  113, 7187},
+        {  113, 7547},{  114, 7887},{  117, 8230},{  112, 8590}
+      }
+    },
+    {
+      /*Cr  qi=10  INTRA*/
+      {
+        {    2,    7},{   38,  375},{   69,  761},{   96, 1224},
+        {  116, 1701},{  131, 2175},{  148, 2634},{  168, 3041},
+        {  190, 3439},{  211, 3802},{  238, 4151},{  271, 4506},
+        {  297, 4824},{  331, 5103},{  373, 5360},{  415, 5632},
+        {  459, 5928},{  500, 6176},{  535, 6386},{  573, 6586},
+        {  608, 6834},{  629, 7079},{  642, 7337},{  686, 7680}
+      },
+      /*Cr  qi=10  INTER*/
+      {
+        {   81,   34},{   63,  333},{   50,  633},{   48,  987},
+        {   53, 1397},{   61, 1820},{   71, 2237},{   83, 2651},
+        {   91, 3065},{   95, 3479},{   98, 3882},{  100, 4279},
+        {  101, 4673},{  101, 5054},{  100, 5429},{  101, 5801},
+        {  102, 6173},{  104, 6541},{  108, 6904},{  110, 7264},
+        {  114, 7609},{  119, 7945},{  123, 8275},{  128, 8615}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=11  INTRA*/
+      {
+        {  110,  -66},{  176, 1564},{  316, 3087},{  492, 4296},
+        {  645, 5299},{  781, 6217},{  924, 7039},{ 1075, 7776},
+        { 1232, 8421},{ 1410, 9005},{ 1607, 9532},{ 1834, 9929},
+        { 2053,10300},{ 2249,10697},{ 2427,11184},{ 2619,11682},
+        { 2826,12083},{ 3019,12508},{ 3225,12869},{ 3452,13064},
+        { 3670,13280},{ 3890,13519},{ 4123,13750},{ 4367,14059}
+      },
+      /*Y'  qi=11  INTER*/
+      {
+        {   72, -115},{   32, 1354},{   83, 2911},{  126, 4534},
+        {  154, 6080},{  178, 7475},{  194, 8779},{  205,10047},
+        {  222,11290},{  246,12488},{  281,13621},{  322,14714},
+        {  372,15786},{  436,16821},{  519,17813},{  628,18728},
+        {  770,19549},{  950,20254},{ 1175,20800},{ 1443,21197},
+        { 1752,21446},{ 2095,21555},{ 2457,21553},{ 2808,21544}
+      }
+    },
+    {
+      /*Cb  qi=11  INTRA*/
+      {
+        {    2,    4},{   45,  367},{   81,  740},{  101, 1177},
+        {  121, 1650},{  142, 2136},{  159, 2621},{  174, 3075},
+        {  199, 3451},{  234, 3778},{  265, 4117},{  297, 4473},
+        {  333, 4789},{  367, 5054},{  402, 5319},{  427, 5613},
+        {  462, 5871},{  503, 6107},{  532, 6336},{  560, 6584},
+        {  601, 6842},{  631, 7092},{  662, 7292},{  721, 7497}
+      },
+      /*Cb  qi=11  INTER*/
+      {
+        {  117,  -24},{   93,  308},{   69,  638},{   52,  993},
+        {   52, 1395},{   58, 1822},{   68, 2246},{   80, 2665},
+        {   89, 3082},{   94, 3492},{   96, 3900},{   98, 4299},
+        {  101, 4679},{  103, 5047},{  104, 5405},{  106, 5763},
+        {  106, 6120},{  107, 6474},{  109, 6823},{  112, 7163},
+        {  115, 7516},{  117, 7868},{  118, 8213},{  119, 8561}
+      }
+    },
+    {
+      /*Cr  qi=11  INTRA*/
+      {
+        {    2,    7},{   40,  375},{   75,  761},{  100, 1224},
+        {  119, 1700},{  137, 2169},{  154, 2622},{  178, 3025},
+        {  198, 3416},{  220, 3770},{  255, 4114},{  294, 4459},
+        {  323, 4756},{  359, 5028},{  399, 5292},{  438, 5556},
+        {  483, 5827},{  518, 6073},{  551, 6298},{  598, 6501},
+        {  634, 6754},{  652, 6997},{  670, 7211},{  689, 7560}
+      },
+      /*Cr  qi=11  INTER*/
+      {
+        {   75,   30},{   61,  334},{   51,  639},{   49,  995},
+        {   53, 1403},{   62, 1821},{   73, 2237},{   84, 2654},
+        {   91, 3070},{   95, 3485},{   96, 3890},{   98, 4287},
+        {   98, 4672},{   99, 5050},{   99, 5427},{  100, 5798},
+        {  103, 6169},{  105, 6528},{  107, 6881},{  113, 7233},
+        {  118, 7580},{  121, 7916},{  125, 8240},{  130, 8551}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=12  INTRA*/
+      {
+        {  104,  -69},{  182, 1557},{  335, 3040},{  521, 4205},
+        {  684, 5178},{  831, 6068},{  986, 6854},{ 1151, 7559},
+        { 1323, 8169},{ 1523, 8704},{ 1736, 9192},{ 1978, 9558},
+        { 2213, 9908},{ 2421,10298},{ 2613,10757},{ 2822,11208},
+        { 3042,11585},{ 3250,11991},{ 3474,12308},{ 3710,12480},
+        { 3939,12687},{ 4174,12902},{ 4416,13102},{ 4672,13369}
+      },
+      /*Y'  qi=12  INTER*/
+      {
+        {   52,  -91},{   34, 1355},{   86, 2911},{  129, 4518},
+        {  159, 6037},{  184, 7405},{  200, 8694},{  213, 9955},
+        {  232,11185},{  263,12360},{  304,13479},{  354,14555},
+        {  415,15601},{  495,16608},{  601,17549},{  738,18400},
+        {  915,19136},{ 1139,19724},{ 1414,20150},{ 1731,20412},
+        { 2090,20520},{ 2473,20509},{ 2851,20442},{ 3227,20328}
+      }
+    },
+    {
+      /*Cb  qi=12  INTRA*/
+      {
+        {    1,    4},{   46,  367},{   85,  740},{  109, 1178},
+        {  126, 1650},{  145, 2134},{  165, 2617},{  182, 3061},
+        {  209, 3428},{  245, 3749},{  281, 4077},{  316, 4417},
+        {  354, 4718},{  392, 4970},{  430, 5217},{  456, 5501},
+        {  490, 5771},{  534, 5996},{  571, 6207},{  600, 6458},
+        {  644, 6697},{  675, 6942},{  707, 7151},{  766, 7342}
+      },
+      /*Cb  qi=12  INTER*/
+      {
+        {   84,  -24},{   73,  311},{   60,  644},{   52,  998},
+        {   53, 1398},{   60, 1825},{   71, 2249},{   83, 2665},
+        {   90, 3081},{   94, 3490},{   97, 3893},{   99, 4286},
+        {  102, 4663},{  104, 5032},{  105, 5393},{  106, 5751},
+        {  107, 6102},{  108, 6445},{  111, 6788},{  113, 7136},
+        {  114, 7483},{  117, 7828},{  121, 8163},{  122, 8496}
+      }
+    },
+    {
+      /*Cr  qi=12  INTRA*/
+      {
+        {    3,    7},{   41,  375},{   78,  761},{  106, 1225},
+        {  124, 1700},{  140, 2167},{  163, 2616},{  188, 3010},
+        {  213, 3385},{  240, 3718},{  271, 4062},{  309, 4406},
+        {  345, 4691},{  387, 4956},{  430, 5212},{  469, 5467},
+        {  513, 5729},{  554, 5970},{  587, 6176},{  633, 6395},
+        {  673, 6659},{  692, 6868},{  712, 7061},{  758, 7259}
+      },
+      /*Cr  qi=12  INTER*/
+      {
+        {   73,   31},{   59,  335},{   48,  638},{   50,  998},
+        {   56, 1410},{   65, 1827},{   75, 2240},{   85, 2657},
+        {   92, 3073},{   95, 3485},{   97, 3888},{   99, 4279},
+        {   98, 4663},{   99, 5042},{  101, 5412},{  102, 5779},
+        {  105, 6142},{  107, 6498},{  108, 6848},{  113, 7198},
+        {  118, 7540},{  121, 7867},{  127, 8188},{  132, 8508}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=13  INTRA*/
+      {
+        {  109,  -68},{  187, 1551},{  347, 3010},{  541, 4153},
+        {  709, 5107},{  864, 5975},{ 1026, 6745},{ 1194, 7433},
+        { 1375, 8021},{ 1581, 8550},{ 1803, 9026},{ 2054, 9371},
+        { 2301, 9713},{ 2522,10082},{ 2728,10515},{ 2949,10956},
+        { 3184,11297},{ 3408,11653},{ 3643,11946},{ 3886,12100},
+        { 4124,12277},{ 4377,12459},{ 4632,12635},{ 4898,12861}
+      },
+      /*Y'  qi=13  INTER*/
+      {
+        {   48,  -78},{   35, 1357},{   89, 2914},{  133, 4512},
+        {  164, 6004},{  190, 7348},{  207, 8627},{  222, 9881},
+        {  247,11096},{  284,12251},{  333,13350},{  392,14407},
+        {  466,15426},{  565,16391},{  696,17279},{  865,18058},
+        { 1085,18689},{ 1358,19156},{ 1684,19456},{ 2050,19605},
+        { 2447,19614},{ 2855,19524},{ 3243,19398},{ 3611,19201}
+      }
+    },
+    {
+      /*Cb  qi=13  INTRA*/
+      {
+        {    2,    4},{   47,  367},{   86,  741},{  108, 1179},
+        {  127, 1651},{  150, 2133},{  173, 2611},{  194, 3050},
+        {  222, 3417},{  262, 3733},{  303, 4048},{  337, 4375},
+        {  378, 4657},{  420, 4897},{  456, 5148},{  486, 5422},
+        {  518, 5682},{  558, 5903},{  592, 6113},{  623, 6372},
+        {  662, 6628},{  700, 6833},{  751, 6989},{  805, 7147}
+      },
+      /*Cb  qi=13  INTER*/
+      {
+        {   94,  -34},{   78,  303},{   60,  638},{   51,  994},
+        {   54, 1406},{   61, 1836},{   73, 2253},{   84, 2668},
+        {   92, 3082},{   96, 3492},{   99, 3894},{  101, 4284},
+        {  103, 4659},{  105, 5023},{  106, 5376},{  108, 5726},
+        {  109, 6070},{  110, 6418},{  113, 6765},{  117, 7105},
+        {  119, 7448},{  122, 7784},{  126, 8119},{  131, 8463}
+      }
+    },
+    {
+      /*Cr  qi=13  INTRA*/
+      {
+        {    3,    7},{   43,  375},{   80,  762},{  110, 1226},
+        {  131, 1701},{  149, 2166},{  172, 2610},{  196, 2999},
+        {  221, 3359},{  254, 3679},{  292, 4005},{  332, 4329},
+        {  369, 4612},{  408, 4880},{  456, 5139},{  500, 5388},
+        {  544, 5631},{  581, 5877},{  615, 6101},{  660, 6316},
+        {  692, 6594},{  714, 6795},{  736, 6997},{  789, 7290}
+      },
+      /*Cr  qi=13  INTER*/
+      {
+        {   73,   28},{   61,  336},{   46,  642},{   50, 1003},
+        {   58, 1414},{   67, 1832},{   79, 2245},{   87, 2660},
+        {   93, 3075},{   97, 3484},{   99, 3888},{  100, 4277},
+        {  100, 4651},{  100, 5027},{  101, 5403},{  102, 5765},
+        {  105, 6116},{  109, 6470},{  113, 6825},{  119, 7163},
+        {  124, 7497},{  127, 7827},{  131, 8137},{  135, 8437}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=14  INTRA*/
+      {
+        {  113,  -68},{  191, 1545},{  358, 2981},{  559, 4104},
+        {  733, 5044},{  896, 5890},{ 1066, 6636},{ 1241, 7304},
+        { 1428, 7886},{ 1642, 8402},{ 1872, 8871},{ 2128, 9219},
+        { 2380, 9547},{ 2609, 9908},{ 2825,10321},{ 3055,10728},
+        { 3294,11076},{ 3523,11425},{ 3766,11689},{ 4013,11845},
+        { 4254,12022},{ 4506,12209},{ 4759,12383},{ 5013,12637}
+      },
+      /*Y'  qi=14  INTER*/
+      {
+        {   58,  -82},{   38, 1362},{   93, 2914},{  138, 4492},
+        {  171, 5962},{  198, 7289},{  216, 8559},{  234, 9804},
+        {  263,11005},{  306,12143},{  363,13222},{  434,14259},
+        {  523,15255},{  639,16188},{  794,17021},{ 1000,17717},
+        { 1262,18260},{ 1575,18645},{ 1943,18841},{ 2356,18872},
+        { 2782,18802},{ 3194,18682},{ 3576,18559},{ 3923,18447}
+      }
+    },
+    {
+      /*Cb  qi=14  INTRA*/
+      {
+        {    2,    3},{   50,  367},{   91,  741},{  114, 1180},
+        {  134, 1651},{  157, 2131},{  181, 2601},{  208, 3028},
+        {  239, 3391},{  279, 3706},{  322, 4000},{  361, 4309},
+        {  406, 4587},{  445, 4822},{  482, 5067},{  515, 5344},
+        {  546, 5612},{  589, 5821},{  626, 6020},{  655, 6276},
+        {  701, 6523},{  748, 6717},{  796, 6876},{  815, 7151}
+      },
+      /*Cb  qi=14  INTER*/
+      {
+        {   80,  -43},{   68,  301},{   56,  644},{   50, 1004},
+        {   54, 1412},{   63, 1836},{   75, 2253},{   87, 2670},
+        {   94, 3083},{   98, 3487},{  101, 3885},{  103, 4271},
+        {  106, 4645},{  107, 5004},{  108, 5358},{  109, 5705},
+        {  112, 6047},{  115, 6388},{  118, 6731},{  121, 7081},
+        {  126, 7421},{  129, 7747},{  132, 8076},{  137, 8419}
+      }
+    },
+    {
+      /*Cr  qi=14  INTRA*/
+      {
+        {    3,    6},{   45,  375},{   85,  762},{  116, 1226},
+        {  138, 1700},{  158, 2163},{  180, 2602},{  206, 2985},
+        {  236, 3333},{  270, 3639},{  310, 3956},{  359, 4258},
+        {  397, 4524},{  430, 4802},{  478, 5068},{  527, 5316},
+        {  572, 5560},{  613, 5802},{  654, 6012},{  699, 6216},
+        {  734, 6489},{  755, 6707},{  775, 6898},{  841, 7111}
+      },
+      /*Cr  qi=14  INTER*/
+      {
+        {   78,    0},{   59,  322},{   46,  649},{   51, 1016},
+        {   58, 1422},{   68, 1839},{   81, 2253},{   90, 2666},
+        {   95, 3080},{   98, 3486},{  101, 3881},{  102, 4268},
+        {  102, 4644},{  103, 5017},{  105, 5382},{  106, 5743},
+        {  108, 6093},{  112, 6442},{  118, 6791},{  124, 7130},
+        {  127, 7463},{  133, 7784},{  138, 8085},{  142, 8395}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=15  INTRA*/
+      {
+        {  111,  -66},{  197, 1538},{  370, 2949},{  579, 4050},
+        {  762, 4968},{  933, 5798},{ 1112, 6520},{ 1299, 7161},
+        { 1497, 7725},{ 1723, 8219},{ 1967, 8654},{ 2234, 8990},
+        { 2499, 9302},{ 2740, 9637},{ 2968,10039},{ 3215,10414},
+        { 3473,10709},{ 3721,11015},{ 3971,11270},{ 4228,11402},
+        { 4487,11543},{ 4752,11707},{ 5011,11871},{ 5290,12099}
+      },
+      /*Y'  qi=15  INTER*/
+      {
+        {   59, -113},{   37, 1349},{   95, 2904},{  139, 4478},
+        {  174, 5929},{  201, 7244},{  220, 8505},{  241, 9736},
+        {  275,10922},{  327,12040},{  395,13097},{  477,14114},
+        {  585,15071},{  730,15947},{  917,16714},{ 1162,17326},
+        { 1468,17770},{ 1833,18029},{ 2251,18111},{ 2694,18068},
+        { 3125,17968},{ 3529,17845},{ 3908,17713},{ 4260,17587}
+      }
+    },
+    {
+      /*Cb  qi=15  INTRA*/
+      {
+        {    2,    3},{   51,  367},{   94,  741},{  120, 1180},
+        {  140, 1651},{  160, 2129},{  184, 2591},{  213, 3010},
+        {  246, 3371},{  289, 3680},{  335, 3969},{  374, 4274},
+        {  418, 4546},{  460, 4783},{  498, 5019},{  532, 5280},
+        {  565, 5553},{  608, 5765},{  647, 5958},{  683, 6193},
+        {  732, 6433},{  782, 6620},{  832, 6769},{  848, 7027}
+      },
+      /*Cb  qi=15  INTER*/
+      {
+        {   71,  -52},{   63,  296},{   54,  644},{   50, 1010},
+        {   53, 1417},{   64, 1837},{   77, 2253},{   88, 2666},
+        {   95, 3079},{   98, 3487},{  100, 3882},{  103, 4264},
+        {  106, 4633},{  108, 4991},{  109, 5343},{  109, 5693},
+        {  112, 6038},{  114, 6371},{  119, 6709},{  123, 7051},
+        {  125, 7385},{  130, 7716},{  135, 8050},{  140, 8374}
+      }
+    },
+    {
+      /*Cr  qi=15  INTRA*/
+      {
+        {    2,    6},{   47,  375},{   87,  763},{  119, 1225},
+        {  143, 1699},{  162, 2158},{  185, 2595},{  213, 2971},
+        {  246, 3315},{  279, 3618},{  320, 3920},{  372, 4210},
+        {  409, 4480},{  446, 4756},{  496, 5017},{  542, 5263},
+        {  590, 5487},{  639, 5721},{  687, 5923},{  724, 6132},
+        {  753, 6417},{  781, 6622},{  805, 6806},{  856, 6977}
+      },
+      /*Cr  qi=15  INTER*/
+      {
+        {   71,    3},{   61,  326},{   52,  651},{   50, 1017},
+        {   58, 1422},{   69, 1837},{   82, 2251},{   90, 2668},
+        {   95, 3080},{   98, 3484},{  101, 3877},{  102, 4257},
+        {  102, 4632},{  101, 5005},{  103, 5370},{  106, 5733},
+        {  110, 6082},{  116, 6424},{  120, 6774},{  124, 7106},
+        {  130, 7427},{  135, 7748},{  141, 8052},{  147, 8333}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=16  INTRA*/
+      {
+        {  114,  -63},{  206, 1525},{  396, 2887},{  618, 3945},
+        {  816, 4832},{ 1002, 5626},{ 1196, 6319},{ 1401, 6923},
+        { 1616, 7458},{ 1857, 7928},{ 2121, 8334},{ 2405, 8645},
+        { 2685, 8934},{ 2938, 9255},{ 3175, 9638},{ 3433, 9990},
+        { 3707,10263},{ 3958,10577},{ 4218,10807},{ 4488,10906},
+        { 4760,11028},{ 5037,11148},{ 5306,11286},{ 5625,11463}
+      },
+      /*Y'  qi=16  INTER*/
+      {
+        {   69, -153},{   39, 1348},{   98, 2894},{  144, 4448},
+        {  181, 5872},{  209, 7167},{  228, 8422},{  254, 9644},
+        {  297,10810},{  359,11908},{  438,12944},{  539,13930},
+        {  672,14842},{  850,15650},{ 1085,16318},{ 1391,16793},
+        { 1769,17082},{ 2200,17198},{ 2659,17174},{ 3116,17072},
+        { 3547,16948},{ 3943,16819},{ 4299,16701},{ 4611,16644}
+      }
+    },
+    {
+      /*Cb  qi=16  INTRA*/
+      {
+        {    3,    4},{   54,  367},{   97,  742},{  122, 1181},
+        {  143, 1651},{  168, 2123},{  197, 2575},{  226, 2985},
+        {  263, 3338},{  314, 3631},{  367, 3903},{  409, 4200},
+        {  453, 4468},{  491, 4703},{  528, 4932},{  566, 5188},
+        {  601, 5459},{  647, 5672},{  693, 5844},{  734, 6058},
+        {  784, 6305},{  836, 6460},{  882, 6602},{  905, 6891}
+      },
+      /*Cb  qi=16  INTER*/
+      {
+        {   75,  -64},{   67,  292},{   56,  645},{   51, 1016},
+        {   54, 1421},{   66, 1842},{   79, 2257},{   89, 2670},
+        {   95, 3082},{   98, 3488},{  101, 3879},{  104, 4258},
+        {  106, 4623},{  108, 4974},{  109, 5321},{  113, 5664},
+        {  116, 6001},{  117, 6341},{  123, 6677},{  128, 7004},
+        {  130, 7336},{  136, 7671},{  143, 7996},{  148, 8310}
+      }
+    },
+    {
+      /*Cr  qi=16  INTRA*/
+      {
+        {    4,    7},{   50,  375},{   90,  763},{  124, 1225},
+        {  148, 1698},{  168, 2154},{  195, 2582},{  227, 2948},
+        {  263, 3279},{  302, 3575},{  343, 3865},{  394, 4137},
+        {  439, 4402},{  482, 4672},{  533, 4925},{  579, 5165},
+        {  626, 5382},{  675, 5616},{  725, 5812},{  769, 5991},
+        {  810, 6242},{  848, 6430},{  868, 6615},{  944, 6732}
+      },
+      /*Cr  qi=16  INTER*/
+      {
+        {   78,   11},{   62,  327},{   49,  650},{   50, 1025},
+        {   59, 1431},{   72, 1841},{   83, 2253},{   90, 2671},
+        {   95, 3084},{   98, 3487},{  100, 3879},{  101, 4254},
+        {  102, 4625},{  103, 4994},{  106, 5355},{  108, 5708},
+        {  111, 6058},{  115, 6400},{  121, 6733},{  128, 7058},
+        {  134, 7374},{  140, 7691},{  146, 7993},{  146, 8317}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=17  INTRA*/
+      {
+        {  112,  -59},{  210, 1515},{  409, 2850},{  640, 3882},
+        {  844, 4748},{ 1038, 5529},{ 1240, 6206},{ 1452, 6803},
+        { 1676, 7330},{ 1925, 7792},{ 2194, 8201},{ 2483, 8512},
+        { 2766, 8801},{ 3027, 9121},{ 3279, 9482},{ 3548, 9810},
+        { 3825,10069},{ 4088,10345},{ 4362,10544},{ 4638,10644},
+        { 4915,10744},{ 5196,10850},{ 5471,10981},{ 5802,11136}
+      },
+      /*Y'  qi=17  INTER*/
+      {
+        {   70, -147},{   45, 1349},{  106, 2894},{  155, 4425},
+        {  195, 5818},{  225, 7099},{  247, 8348},{  278, 9565},
+        {  328,10717},{  399,11794},{  491,12807},{  609,13760},
+        {  766,14623},{  984,15349},{ 1274,15902},{ 1642,16256},
+        { 2082,16411},{ 2563,16409},{ 3048,16315},{ 3508,16194},
+        { 3924,16064},{ 4306,15938},{ 4656,15828},{ 4966,15733}
+      }
+    },
+    {
+      /*Cb  qi=17  INTRA*/
+      {
+        {    3,    4},{   57,  367},{  101,  742},{  126, 1182},
+        {  148, 1650},{  175, 2118},{  207, 2565},{  241, 2966},
+        {  279, 3307},{  331, 3588},{  389, 3845},{  435, 4132},
+        {  474, 4408},{  517, 4641},{  560, 4869},{  602, 5122},
+        {  638, 5389},{  672, 5610},{  716, 5787},{  758, 6002},
+        {  817, 6226},{  869, 6393},{  916, 6530},{  950, 6799}
+      },
+      /*Cb  qi=17  INTER*/
+      {
+        {  105,  -65},{   86,  288},{   66,  638},{   54, 1014},
+        {   59, 1427},{   71, 1844},{   86, 2257},{   95, 2668},
+        {  100, 3075},{  103, 3476},{  106, 3867},{  110, 4241},
+        {  112, 4598},{  114, 4948},{  117, 5294},{  121, 5633},
+        {  123, 5968},{  126, 6301},{  131, 6637},{  136, 6968},
+        {  144, 7287},{  152, 7606},{  158, 7931},{  162, 8262}
+      }
+    },
+    {
+      /*Cr  qi=17  INTRA*/
+      {
+        {    4,    6},{   55,  376},{   97,  765},{  128, 1226},
+        {  152, 1696},{  175, 2144},{  204, 2568},{  241, 2928},
+        {  282, 3250},{  323, 3530},{  368, 3811},{  420, 4089},
+        {  463, 4347},{  505, 4609},{  562, 4860},{  609, 5094},
+        {  655, 5303},{  709, 5535},{  759, 5740},{  803, 5913},
+        {  844, 6153},{  879, 6350},{  905, 6527},{  972, 6637}
+      },
+      /*Cr  qi=17  INTER*/
+      {
+        {   88,    8},{   68,  330},{   51,  653},{   54, 1028},
+        {   65, 1433},{   77, 1845},{   89, 2257},{   96, 2669},
+        {  100, 3081},{  102, 3481},{  105, 3867},{  106, 4245},
+        {  108, 4613},{  110, 4971},{  112, 5328},{  115, 5679},
+        {  120, 6019},{  127, 6355},{  133, 6686},{  140, 7007},
+        {  149, 7316},{  158, 7618},{  166, 7924},{  170, 8232}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=18  INTRA*/
+      {
+        {  122,  -58},{  216, 1506},{  425, 2815},{  665, 3822},
+        {  882, 4666},{ 1088, 5425},{ 1301, 6084},{ 1529, 6653},
+        { 1766, 7162},{ 2026, 7611},{ 2312, 7987},{ 2612, 8278},
+        { 2913, 8551},{ 3196, 8840},{ 3454, 9184},{ 3734, 9490},
+        { 4030, 9725},{ 4305, 9973},{ 4585,10162},{ 4864,10251},
+        { 5150,10324},{ 5443,10420},{ 5727,10536},{ 6053,10682}
+      },
+      /*Y'  qi=18  INTER*/
+      {
+        {   66, -143},{   47, 1351},{  108, 2886},{  158, 4401},
+        {  200, 5775},{  232, 7044},{  256, 8288},{  292, 9493},
+        {  351,10625},{  434,11679},{  541,12665},{  681,13578},
+        {  875,14379},{ 1136,15025},{ 1483,15475},{ 1914,15709},
+        { 2399,15767},{ 2907,15699},{ 3400,15579},{ 3852,15453},
+        { 4259,15332},{ 4630,15221},{ 4976,15121},{ 5294,15061}
+      }
+    },
+    {
+      /*Cb  qi=18  INTRA*/
+      {
+        {    2,    3},{   61,  367},{  107,  743},{  131, 1182},
+        {  155, 1648},{  183, 2110},{  220, 2542},{  260, 2927},
+        {  303, 3265},{  359, 3540},{  416, 3785},{  462, 4063},
+        {  506, 4334},{  553, 4567},{  595, 4797},{  636, 5049},
+        {  676, 5304},{  717, 5516},{  759, 5698},{  801, 5904},
+        {  861, 6133},{  911, 6311},{  962, 6443},{ 1021, 6645}
+      },
+      /*Cb  qi=18  INTER*/
+      {
+        {  126,    5},{   95,  326},{   66,  643},{   55, 1015},
+        {   60, 1427},{   73, 1843},{   87, 2256},{   96, 2667},
+        {  101, 3073},{  104, 3470},{  108, 3853},{  111, 4226},
+        {  114, 4584},{  117, 4928},{  119, 5274},{  122, 5612},
+        {  126, 5942},{  130, 6271},{  136, 6606},{  141, 6931},
+        {  148, 7247},{  156, 7568},{  164, 7891},{  173, 8211}
+      }
+    },
+    {
+      /*Cr  qi=18  INTRA*/
+      {
+        {    4,    6},{   59,  376},{  104,  765},{  133, 1226},
+        {  156, 1692},{  184, 2136},{  218, 2548},{  260, 2893},
+        {  308, 3204},{  348, 3481},{  397, 3751},{  448, 4024},
+        {  490, 4281},{  541, 4523},{  593, 4776},{  634, 5022},
+        {  685, 5236},{  748, 5455},{  812, 5638},{  856, 5818},
+        {  891, 6048},{  928, 6230},{  961, 6405},{ 1055, 6449}
+      },
+      /*Cr  qi=18  INTER*/
+      {
+        {   81,   34},{   68,  342},{   57,  652},{   59, 1027},
+        {   67, 1439},{   80, 1848},{   91, 2257},{   97, 2670},
+        {  100, 3076},{  103, 3473},{  106, 3857},{  108, 4231},
+        {  109, 4599},{  110, 4958},{  113, 5307},{  119, 5650},
+        {  125, 5991},{  130, 6325},{  138, 6651},{  147, 6971},
+        {  153, 7278},{  162, 7578},{  172, 7874},{  177, 8156}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=19  INTRA*/
+      {
+        {  128,  -55},{  228, 1495},{  448, 2775},{  699, 3758},
+        {  931, 4571},{ 1154, 5296},{ 1386, 5914},{ 1636, 6450},
+        { 1894, 6930},{ 2177, 7342},{ 2479, 7698},{ 2792, 7976},
+        { 3099, 8235},{ 3392, 8517},{ 3658, 8853},{ 3938, 9155},
+        { 4242, 9371},{ 4527, 9605},{ 4810, 9781},{ 5089, 9853},
+        { 5378, 9920},{ 5674,10009},{ 5972,10110},{ 6336,10196}
+      },
+      /*Y'  qi=19  INTER*/
+      {
+        {   69, -147},{   49, 1353},{  111, 2883},{  162, 4381},
+        {  205, 5737},{  237, 6996},{  264, 8232},{  307, 9421},
+        {  376,10534},{  472,11567},{  596,12525},{  761,13395},
+        {  990,14130},{ 1298,14694},{ 1695,15053},{ 2172,15195},
+        { 2696,15173},{ 3213,15075},{ 3696,14948},{ 4141,14829},
+        { 4541,14721},{ 4910,14609},{ 5245,14506},{ 5536,14399}
+      }
+    },
+    {
+      /*Cb  qi=19  INTRA*/
+      {
+        {    3,    3},{   61,  367},{  109,  743},{  135, 1182},
+        {  161, 1646},{  191, 2101},{  229, 2524},{  273, 2898},
+        {  318, 3221},{  376, 3490},{  436, 3731},{  487, 3994},
+        {  539, 4251},{  584, 4485},{  621, 4721},{  664, 4967},
+        {  709, 5225},{  752, 5431},{  801, 5595},{  846, 5796},
+        {  912, 6011},{  959, 6193},{ 1015, 6321},{ 1121, 6504}
+      },
+      /*Cb  qi=19  INTER*/
+      {
+        {  126,    4},{   97,  329},{   69,  649},{   56, 1017},
+        {   61, 1432},{   74, 1846},{   88, 2255},{   98, 2663},
+        {  103, 3065},{  106, 3460},{  110, 3844},{  114, 4211},
+        {  117, 4564},{  120, 4911},{  122, 5253},{  125, 5588},
+        {  129, 5916},{  135, 6241},{  142, 6567},{  149, 6885},
+        {  155, 7206},{  163, 7527},{  174, 7843},{  188, 8145}
+      }
+    },
+    {
+      /*Cr  qi=19  INTRA*/
+      {
+        {    5,    6},{   61,  376},{  106,  765},{  135, 1225},
+        {  160, 1689},{  192, 2126},{  229, 2531},{  271, 2869},
+        {  321, 3168},{  370, 3433},{  421, 3704},{  476, 3965},
+        {  520, 4212},{  572, 4452},{  629, 4691},{  671, 4939},
+        {  724, 5152},{  792, 5347},{  858, 5510},{  895, 5696},
+        {  939, 5905},{  991, 6056},{ 1027, 6244},{ 1127, 6333}
+      },
+      /*Cr  qi=19  INTER*/
+      {
+        {   80,   45},{   66,  344},{   55,  654},{   56, 1030},
+        {   66, 1440},{   80, 1850},{   91, 2259},{   98, 2668},
+        {  102, 3072},{  104, 3466},{  107, 3845},{  109, 4215},
+        {  110, 4578},{  112, 4933},{  116, 5283},{  122, 5625},
+        {  129, 5963},{  136, 6287},{  143, 6611},{  151, 6927},
+        {  160, 7229},{  170, 7528},{  181, 7818},{  191, 8092}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=20  INTRA*/
+      {
+        {  129,  -50},{  238, 1481},{  469, 2728},{  730, 3684},
+        {  974, 4473},{ 1213, 5171},{ 1463, 5763},{ 1729, 6281},
+        { 2002, 6744},{ 2299, 7146},{ 2613, 7492},{ 2940, 7746},
+        { 3265, 7978},{ 3571, 8228},{ 3853, 8543},{ 4156, 8815},
+        { 4476, 9001},{ 4775, 9218},{ 5070, 9373},{ 5352, 9446},
+        { 5649, 9510},{ 5956, 9580},{ 6268, 9660},{ 6647, 9705}
+      },
+      /*Y'  qi=20  INTER*/
+      {
+        {   64,  -93},{   52, 1340},{  116, 2862},{  170, 4344},
+        {  216, 5678},{  249, 6928},{  281, 8155},{  333, 9326},
+        {  418,10410},{  533,11411},{  683,12329},{  890,13127},
+        { 1183,13750},{ 1579,14162},{ 2066,14357},{ 2611,14370},
+        { 3159,14284},{ 3675,14167},{ 4142,14053},{ 4568,13953},
+        { 4961,13852},{ 5320,13755},{ 5649,13675},{ 5933,13610}
+      }
+    },
+    {
+      /*Cb  qi=20  INTRA*/
+      {
+        {    3,    3},{   62,  367},{  112,  743},{  140, 1183},
+        {  165, 1646},{  196, 2099},{  235, 2517},{  284, 2883},
+        {  334, 3198},{  393, 3460},{  457, 3690},{  509, 3945},
+        {  560, 4198},{  605, 4435},{  647, 4658},{  699, 4888},
+        {  742, 5155},{  788, 5350},{  835, 5517},{  880, 5730},
+        {  956, 5914},{ 1007, 6060},{ 1053, 6199},{ 1158, 6358}
+      },
+      /*Cb  qi=20  INTER*/
+      {
+        {  128,   -6},{   96,  322},{   66,  653},{   54, 1025},
+        {   63, 1431},{   79, 1844},{   91, 2256},{   99, 2665},
+        {  104, 3065},{  107, 3455},{  111, 3831},{  115, 4189},
+        {  120, 4539},{  123, 4885},{  126, 5219},{  130, 5548},
+        {  135, 5876},{  141, 6199},{  149, 6519},{  156, 6837},
+        {  166, 7153},{  179, 7468},{  189, 7784},{  194, 8102}
+      }
+    },
+    {
+      /*Cr  qi=20  INTRA*/
+      {
+        {    4,    6},{   63,  376},{  109,  765},{  139, 1225},
+        {  165, 1689},{  199, 2124},{  239, 2523},{  285, 2852},
+        {  340, 3140},{  388, 3398},{  438, 3662},{  499, 3914},
+        {  547, 4155},{  596, 4392},{  652, 4634},{  699, 4877},
+        {  759, 5074},{  824, 5257},{  883, 5428},{  936, 5589},
+        {  986, 5790},{ 1030, 5960},{ 1074, 6119},{ 1172, 6191}
+      },
+      /*Cr  qi=20  INTER*/
+      {
+        {   92,   40},{   70,  345},{   55,  658},{   57, 1034},
+        {   69, 1441},{   84, 1852},{   94, 2261},{   98, 2669},
+        {  102, 3074},{  105, 3465},{  107, 3841},{  110, 4206},
+        {  112, 4562},{  116, 4915},{  121, 5260},{  127, 5591},
+        {  134, 5920},{  142, 6246},{  153, 6562},{  163, 6870},
+        {  173, 7170},{  186, 7463},{  198, 7746},{  199, 8030}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=21  INTRA*/
+      {
+        {  130,  -51},{  244, 1476},{  483, 2705},{  756, 3635},
+        { 1013, 4396},{ 1266, 5070},{ 1530, 5647},{ 1806, 6153},
+        { 2093, 6600},{ 2411, 6976},{ 2739, 7299},{ 3079, 7534},
+        { 3422, 7744},{ 3738, 7987},{ 4032, 8274},{ 4348, 8533},
+        { 4675, 8721},{ 4989, 8909},{ 5291, 9051},{ 5577, 9111},
+        { 5879, 9163},{ 6190, 9228},{ 6506, 9286},{ 6899, 9295}
+      },
+      /*Y'  qi=21  INTER*/
+      {
+        {   64,  -56},{   55, 1341},{  119, 2859},{  174, 4324},
+        {  223, 5640},{  258, 6880},{  295, 8096},{  359, 9246},
+        {  460,10302},{  595,11268},{  778,12131},{ 1032,12857},
+        { 1387,13385},{ 1850,13683},{ 2399,13774},{ 2976,13729},
+        { 3527,13619},{ 4034,13504},{ 4492,13401},{ 4912,13291},
+        { 5298,13209},{ 5648,13137},{ 5974,13046},{ 6308,12977}
+      }
+    },
+    {
+      /*Cb  qi=21  INTRA*/
+      {
+        {    4,    3},{   64,  367},{  114,  743},{  141, 1183},
+        {  166, 1645},{  201, 2092},{  247, 2502},{  299, 2856},
+        {  352, 3158},{  413, 3412},{  480, 3642},{  536, 3893},
+        {  588, 4137},{  637, 4367},{  678, 4598},{  725, 4834},
+        {  774, 5083},{  827, 5269},{  883, 5420},{  930, 5633},
+        {  999, 5829},{ 1057, 5959},{ 1113, 6082},{ 1200, 6265}
+      },
+      /*Cb  qi=21  INTER*/
+      {
+        {  109,   -8},{   84,  321},{   62,  654},{   54, 1028},
+        {   64, 1434},{   80, 1847},{   92, 2259},{  100, 2664},
+        {  105, 3060},{  109, 3445},{  114, 3815},{  118, 4172},
+        {  122, 4519},{  126, 4861},{  128, 5194},{  133, 5520},
+        {  139, 5847},{  146, 6169},{  155, 6487},{  166, 6801},
+        {  177, 7114},{  189, 7423},{  201, 7729},{  208, 8035}
+      }
+    },
+    {
+      /*Cr  qi=21  INTRA*/
+      {
+        {    4,    6},{   64,  377},{  111,  766},{  144, 1225},
+        {  174, 1683},{  206, 2114},{  248, 2506},{  302, 2824},
+        {  357, 3099},{  404, 3357},{  455, 3622},{  519, 3867},
+        {  573, 4098},{  625, 4331},{  683, 4571},{  733, 4802},
+        {  793, 4994},{  863, 5173},{  926, 5337},{  978, 5492},
+        { 1030, 5685},{ 1079, 5856},{ 1126, 6027},{ 1217, 6159}
+      },
+      /*Cr  qi=21  INTER*/
+      {
+        {   82,   29},{   67,  341},{   55,  660},{   58, 1038},
+        {   71, 1443},{   85, 1851},{   95, 2258},{   99, 2666},
+        {  103, 3069},{  107, 3456},{  110, 3826},{  112, 4188},
+        {  114, 4544},{  118, 4891},{  124, 5231},{  132, 5567},
+        {  139, 5894},{  148, 6210},{  159, 6520},{  171, 6822},
+        {  185, 7111},{  196, 7403},{  209, 7691},{  225, 7945}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=22  INTRA*/
+      {
+        {  128,  -45},{  254, 1463},{  507, 2662},{  794, 3562},
+        { 1070, 4292},{ 1340, 4941},{ 1622, 5492},{ 1920, 5968},
+        { 2229, 6387},{ 2565, 6742},{ 2911, 7047},{ 3263, 7264},
+        { 3615, 7464},{ 3944, 7689},{ 4258, 7950},{ 4591, 8183},
+        { 4934, 8347},{ 5259, 8517},{ 5573, 8634},{ 5870, 8683},
+        { 6186, 8723},{ 6508, 8762},{ 6831, 8801},{ 7232, 8830}
+      },
+      /*Y'  qi=22  INTER*/
+      {
+        {   77,  -48},{   57, 1343},{  122, 2853},{  180, 4299},
+        {  231, 5597},{  269, 6826},{  314, 8025},{  393, 9150},
+        {  512,10179},{  673,11103},{  894,11908},{ 1207,12542},
+        { 1635,12956},{ 2166,13148},{ 2755,13167},{ 3345,13088},
+        { 3895,12966},{ 4386,12848},{ 4832,12746},{ 5252,12647},
+        { 5634,12563},{ 5978,12497},{ 6299,12412},{ 6633,12338}
+      }
+    },
+    {
+      /*Cb  qi=22  INTRA*/
+      {
+        {    4,    3},{   66,  367},{  122,  744},{  153, 1182},
+        {  177, 1640},{  213, 2080},{  263, 2475},{  323, 2811},
+        {  382, 3103},{  451, 3346},{  522, 3568},{  581, 3814},
+        {  633, 4054},{  674, 4288},{  719, 4523},{  768, 4756},
+        {  823, 4979},{  883, 5162},{  937, 5325},{  996, 5510},
+        { 1070, 5687},{ 1129, 5807},{ 1193, 5929},{ 1311, 6099}
+      },
+      /*Cb  qi=22  INTER*/
+      {
+        {  107,   -5},{   83,  322},{   61,  653},{   55, 1030},
+        {   66, 1436},{   81, 1845},{   94, 2253},{  102, 2656},
+        {  107, 3050},{  111, 3435},{  115, 3804},{  119, 4158},
+        {  124, 4501},{  128, 4835},{  132, 5164},{  138, 5490},
+        {  146, 5812},{  154, 6128},{  163, 6442},{  174, 6754},
+        {  188, 7060},{  205, 7361},{  219, 7662},{  233, 7953}
+      }
+    },
+    {
+      /*Cr  qi=22  INTRA*/
+      {
+        {    4,    6},{   67,  378},{  118,  767},{  151, 1222},
+        {  182, 1675},{  221, 2097},{  269, 2476},{  329, 2774},
+        {  389, 3039},{  444, 3292},{  500, 3545},{  560, 3788},
+        {  615, 4020},{  671, 4251},{  734, 4484},{  781, 4712},
+        {  850, 4887},{  925, 5060},{  981, 5229},{ 1031, 5369},
+        { 1092, 5549},{ 1148, 5715},{ 1200, 5861},{ 1291, 5943}
+      },
+      /*Cr  qi=22  INTER*/
+      {
+        {   88,   34},{   69,  340},{   57,  657},{   60, 1039},
+        {   73, 1445},{   87, 1851},{   96, 2257},{  100, 2662},
+        {  103, 3058},{  107, 3442},{  111, 3812},{  115, 4172},
+        {  118, 4524},{  123, 4864},{  129, 5199},{  136, 5531},
+        {  145, 5855},{  156, 6168},{  170, 6468},{  184, 6765},
+        {  193, 7066},{  207, 7353},{  222, 7628},{  230, 7900}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=23  INTRA*/
+      {
+        {  126,  -40},{  257, 1458},{  521, 2636},{  825, 3501},
+        { 1111, 4207},{ 1391, 4842},{ 1684, 5385},{ 1992, 5858},
+        { 2311, 6277},{ 2653, 6626},{ 3005, 6929},{ 3366, 7134},
+        { 3729, 7311},{ 4071, 7526},{ 4396, 7770},{ 4734, 7986},
+        { 5086, 8131},{ 5421, 8286},{ 5735, 8404},{ 6033, 8456},
+        { 6357, 8486},{ 6682, 8525},{ 7003, 8573},{ 7387, 8604}
+      },
+      /*Y'  qi=23  INTER*/
+      {
+        {   64,  -57},{   60, 1345},{  124, 2853},{  185, 4284},
+        {  239, 5565},{  282, 6783},{  336, 7967},{  429, 9069},
+        {  568,10063},{  758,10943},{ 1028,11679},{ 1407,12216},
+        { 1909,12520},{ 2502,12616},{ 3126,12573},{ 3722,12461},
+        { 4258,12344},{ 4742,12236},{ 5185,12136},{ 5590,12052},
+        { 5970,11980},{ 6315,11901},{ 6631,11826},{ 6954,11769}
+      }
+    },
+    {
+      /*Cb  qi=23  INTRA*/
+      {
+        {    3,    3},{   70,  367},{  124,  744},{  151, 1182},
+        {  181, 1637},{  222, 2071},{  276, 2460},{  343, 2785},
+        {  403, 3072},{  468, 3317},{  542, 3534},{  605, 3773},
+        {  659, 4009},{  703, 4243},{  747, 4479},{  795, 4707},
+        {  852, 4923},{  908, 5105},{  972, 5254},{ 1043, 5423},
+        { 1118, 5594},{ 1172, 5731},{ 1240, 5853},{ 1365, 6005}
+      },
+      /*Cb  qi=23  INTER*/
+      {
+        {  109,  -10},{   87,  325},{   63,  650},{   57, 1031},
+        {   67, 1439},{   83, 1847},{   96, 2253},{  103, 2652},
+        {  109, 3041},{  114, 3421},{  117, 3789},{  122, 4141},
+        {  128, 4480},{  134, 4811},{  139, 5138},{  144, 5463},
+        {  152, 5781},{  161, 6096},{  174, 6404},{  185, 6714},
+        {  198, 7023},{  216, 7320},{  233, 7621},{  245, 7935}
+      }
+    },
+    {
+      /*Cr  qi=23  INTRA*/
+      {
+        {    5,    6},{   70,  379},{  122,  768},{  155, 1222},
+        {  187, 1671},{  231, 2088},{  283, 2459},{  346, 2750},
+        {  411, 3009},{  465, 3261},{  523, 3509},{  585, 3746},
+        {  639, 3980},{  695, 4219},{  754, 4449},{  803, 4671},
+        {  873, 4840},{  953, 5001},{ 1015, 5156},{ 1071, 5286},
+        { 1137, 5464},{ 1191, 5629},{ 1249, 5782},{ 1359, 5885}
+      },
+      /*Cr  qi=23  INTER*/
+      {
+        {   84,   29},{   69,  343},{   58,  660},{   62, 1041},
+        {   75, 1448},{   88, 1853},{   97, 2258},{  102, 2659},
+        {  105, 3050},{  108, 3430},{  113, 3799},{  116, 4155},
+        {  121, 4505},{  126, 4845},{  132, 5176},{  142, 5504},
+        {  153, 5826},{  165, 6133},{  180, 6432},{  197, 6722},
+        {  212, 7005},{  226, 7287},{  244, 7555},{  258, 7828}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=24  INTRA*/
+      {
+        {  125,  -34},{  268, 1444},{  547, 2590},{  866, 3422},
+        { 1172, 4098},{ 1476, 4702},{ 1790, 5222},{ 2117, 5678},
+        { 2453, 6080},{ 2811, 6418},{ 3178, 6700},{ 3552, 6895},
+        { 3928, 7055},{ 4286, 7243},{ 4627, 7477},{ 4981, 7674},
+        { 5344, 7802},{ 5683, 7944},{ 6009, 8043},{ 6313, 8082},
+        { 6633, 8111},{ 6959, 8151},{ 7280, 8197},{ 7660, 8221}
+      },
+      /*Y'  qi=24  INTER*/
+      {
+        {   62,  -63},{   68, 1345},{  134, 2840},{  199, 4245},
+        {  256, 5508},{  304, 6715},{  371, 7880},{  484, 8950},
+        {  652, 9899},{  892,10709},{ 1238,11334},{ 1722,11722},
+        { 2326,11875},{ 2983,11864},{ 3616,11783},{ 4189,11678},
+        { 4707,11570},{ 5178,11476},{ 5617,11395},{ 6017,11319},
+        { 6380,11252},{ 6720,11185},{ 7044,11126},{ 7377,11118}
+      }
+    },
+    {
+      /*Cb  qi=24  INTRA*/
+      {
+        {    4,    3},{   75,  367},{  132,  745},{  159, 1182},
+        {  187, 1634},{  230, 2061},{  289, 2439},{  361, 2753},
+        {  425, 3034},{  492, 3278},{  566, 3490},{  630, 3720},
+        {  686, 3956},{  732, 4190},{  777, 4420},{  829, 4637},
+        {  894, 4840},{  958, 5012},{ 1023, 5155},{ 1090, 5326},
+        { 1165, 5502},{ 1226, 5622},{ 1299, 5717},{ 1408, 5887}
+      },
+      /*Cb  qi=24  INTER*/
+      {
+        {  110,   35},{   92,  337},{   70,  651},{   63, 1033},
+        {   74, 1440},{   91, 1846},{  102, 2248},{  109, 2644},
+        {  114, 3031},{  120, 3404},{  127, 3762},{  133, 4109},
+        {  138, 4445},{  144, 4772},{  151, 5094},{  159, 5411},
+        {  168, 5728},{  180, 6037},{  195, 6338},{  210, 6640},
+        {  227, 6944},{  249, 7236},{  272, 7528},{  299, 7809}
+      }
+    },
+    {
+      /*Cr  qi=24  INTRA*/
+      {
+        {    5,    6},{   72,  380},{  124,  770},{  158, 1222},
+        {  195, 1668},{  240, 2079},{  297, 2438},{  367, 2715},
+        {  433, 2966},{  488, 3218},{  549, 3467},{  609, 3701},
+        {  664, 3935},{  728, 4165},{  792, 4379},{  845, 4586},
+        {  917, 4744},{  995, 4898},{ 1063, 5049},{ 1120, 5187},
+        { 1190, 5359},{ 1249, 5522},{ 1304, 5672},{ 1397, 5806}
+      },
+      /*Cr  qi=24  INTER*/
+      {
+        {   91,   56},{   73,  353},{   61,  664},{   66, 1045},
+        {   80, 1449},{   95, 1851},{  103, 2250},{  107, 2648},
+        {  111, 3038},{  116, 3413},{  120, 3774},{  124, 4128},
+        {  130, 4471},{  138, 4802},{  145, 5130},{  156, 5453},
+        {  171, 5764},{  187, 6061},{  204, 6355},{  220, 6643},
+        {  238, 6923},{  254, 7204},{  275, 7475},{  289, 7752}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=25  INTRA*/
+      {
+        {  125,  -28},{  285, 1426},{  582, 2540},{  917, 3351},
+        { 1244, 3997},{ 1569, 4570},{ 1903, 5071},{ 2258, 5498},
+        { 2626, 5866},{ 3002, 6182},{ 3382, 6448},{ 3770, 6623},
+        { 4162, 6760},{ 4528, 6934},{ 4882, 7144},{ 5249, 7328},
+        { 5610, 7453},{ 5958, 7578},{ 6291, 7672},{ 6597, 7708},
+        { 6928, 7715},{ 7258, 7737},{ 7575, 7781},{ 7950, 7829}
+      },
+      /*Y'  qi=25  INTER*/
+      {
+        {   64,  -16},{   72, 1348},{  139, 2832},{  206, 4218},
+        {  268, 5465},{  322, 6659},{  403, 7803},{  540, 8838},
+        {  747, 9734},{ 1044,10465},{ 1473,10981},{ 2048,11249},
+        { 2717,11311},{ 3397,11257},{ 4025,11161},{ 4589,11052},
+        { 5099,10947},{ 5560,10859},{ 5989,10786},{ 6389,10717},
+        { 6753,10652},{ 7078,10592},{ 7389,10535},{ 7697,10460}
+      }
+    },
+    {
+      /*Cb  qi=25  INTRA*/
+      {
+        {    3,    3},{   78,  368},{  133,  745},{  159, 1180},
+        {  193, 1627},{  242, 2046},{  304, 2411},{  381, 2714},
+        {  456, 2983},{  527, 3224},{  598, 3437},{  667, 3655},
+        {  726, 3888},{  776, 4117},{  826, 4333},{  883, 4543},
+        {  954, 4727},{ 1019, 4878},{ 1095, 5014},{ 1171, 5187},
+        { 1255, 5342},{ 1319, 5458},{ 1396, 5546},{ 1536, 5678}
+      },
+      /*Cb  qi=25  INTER*/
+      {
+        {  117,   32},{   89,  342},{   67,  660},{   64, 1037},
+        {   77, 1441},{   93, 1845},{  105, 2243},{  113, 2633},
+        {  120, 3016},{  125, 3387},{  131, 3739},{  137, 4080},
+        {  144, 4416},{  152, 4741},{  160, 5057},{  169, 5369},
+        {  180, 5680},{  193, 5990},{  209, 6294},{  227, 6594},
+        {  249, 6888},{  269, 7180},{  294, 7467},{  317, 7768}
+      }
+    },
+    {
+      /*Cr  qi=25  INTRA*/
+      {
+        {    6,    6},{   74,  380},{  129,  770},{  165, 1220},
+        {  201, 1658},{  253, 2061},{  315, 2410},{  388, 2676},
+        {  462, 2920},{  523, 3166},{  584, 3404},{  647, 3637},
+        {  701, 3870},{  769, 4086},{  838, 4296},{  898, 4491},
+        {  980, 4627},{ 1065, 4759},{ 1126, 4920},{ 1187, 5058},
+        { 1283, 5180},{ 1347, 5332},{ 1404, 5475},{ 1527, 5534}
+      },
+      /*Cr  qi=25  INTER*/
+      {
+        {   92,   41},{   75,  347},{   64,  664},{   70, 1045},
+        {   85, 1448},{   98, 1849},{  105, 2245},{  110, 2637},
+        {  115, 3023},{  120, 3395},{  126, 3753},{  131, 4102},
+        {  136, 4439},{  145, 4768},{  156, 5094},{  168, 5410},
+        {  184, 5717},{  203, 6010},{  221, 6300},{  239, 6577},
+        {  262, 6847},{  282, 7123},{  303, 7390},{  322, 7665}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=26  INTRA*/
+      {
+        {  130,  -24},{  292, 1423},{  594, 2525},{  943, 3307},
+        { 1289, 3921},{ 1633, 4467},{ 1991, 4943},{ 2368, 5348},
+        { 2753, 5696},{ 3148, 5991},{ 3545, 6247},{ 3942, 6415},
+        { 4342, 6535},{ 4726, 6690},{ 5093, 6883},{ 5466, 7047},
+        { 5840, 7159},{ 6202, 7274},{ 6545, 7351},{ 6855, 7375},
+        { 7186, 7384},{ 7517, 7416},{ 7840, 7447},{ 8238, 7450}
+      },
+      /*Y'  qi=26  INTER*/
+      {
+        {   52,   16},{   75, 1336},{  143, 2815},{  213, 4191},
+        {  278, 5427},{  339, 6611},{  436, 7734},{  600, 8732},
+        {  843, 9579},{ 1195,10243},{ 1702,10660},{ 2355,10825},
+        { 3070,10820},{ 3755,10743},{ 4372,10643},{ 4925,10538},
+        { 5426,10440},{ 5882,10354},{ 6296,10290},{ 6686,10224},
+        { 7049,10163},{ 7380,10113},{ 7672,10062},{ 7937,10021}
+      }
+    },
+    {
+      /*Cb  qi=26  INTRA*/
+      {
+        {    4,    3},{   79,  368},{  138,  745},{  167, 1180},
+        {  200, 1623},{  252, 2034},{  322, 2389},{  403, 2682},
+        {  480, 2941},{  558, 3176},{  631, 3393},{  700, 3608},
+        {  766, 3825},{  819, 4046},{  868, 4265},{  926, 4472},
+        { 1002, 4645},{ 1070, 4800},{ 1151, 4924},{ 1242, 5063},
+        { 1325, 5221},{ 1393, 5338},{ 1464, 5431},{ 1595, 5559}
+      },
+      /*Cb  qi=26  INTER*/
+      {
+        {   98,   33},{   83,  343},{   65,  662},{   65, 1037},
+        {   80, 1437},{   96, 1839},{  107, 2238},{  115, 2628},
+        {  122, 3007},{  128, 3373},{  134, 3722},{  142, 4060},
+        {  149, 4390},{  158, 4713},{  167, 5029},{  178, 5341},
+        {  191, 5647},{  208, 5948},{  227, 6244},{  247, 6539},
+        {  269, 6833},{  295, 7114},{  328, 7388},{  369, 7658}
+      }
+    },
+    {
+      /*Cr  qi=26  INTRA*/
+      {
+        {    5,    6},{   75,  380},{  133,  769},{  172, 1217},
+        {  212, 1652},{  266, 2048},{  333, 2384},{  412, 2643},
+        {  490, 2880},{  552, 3124},{  616, 3365},{  681, 3594},
+        {  739, 3816},{  810, 4024},{  880, 4224},{  945, 4405},
+        { 1029, 4538},{ 1114, 4674},{ 1183, 4822},{ 1254, 4946},
+        { 1346, 5063},{ 1417, 5201},{ 1478, 5345},{ 1597, 5411}
+      },
+      /*Cr  qi=26  INTER*/
+      {
+        {   97,   29},{   75,  342},{   62,  667},{   70, 1047},
+        {   87, 1447},{  100, 1846},{  107, 2242},{  113, 2633},
+        {  118, 3016},{  123, 3382},{  128, 3737},{  135, 4082},
+        {  142, 4417},{  151, 4746},{  162, 5066},{  176, 5377},
+        {  194, 5679},{  217, 5963},{  239, 6244},{  260, 6522},
+        {  284, 6789},{  309, 7052},{  335, 7313},{  355, 7582}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=27  INTRA*/
+      {
+        {  118,  -10},{  308, 1404},{  630, 2473},{  997, 3227},
+        { 1360, 3819},{ 1719, 4354},{ 2086, 4829},{ 2470, 5233},
+        { 2863, 5576},{ 3267, 5870},{ 3677, 6117},{ 4085, 6268},
+        { 4499, 6376},{ 4888, 6521},{ 5257, 6705},{ 5638, 6865},
+        { 6020, 6962},{ 6394, 7056},{ 6744, 7130},{ 7051, 7158},
+        { 7386, 7164},{ 7717, 7185},{ 8042, 7209},{ 8444, 7206}
+      },
+      /*Y'  qi=27  INTER*/
+      {
+        {   54,   19},{   77, 1333},{  147, 2806},{  221, 4166},
+        {  290, 5390},{  360, 6564},{  474, 7665},{  664, 8630},
+        {  949, 9423},{ 1370,10002},{ 1958,10323},{ 2670,10414},
+        { 3406,10375},{ 4086,10285},{ 4691,10182},{ 5233,10085},
+        { 5724, 9994},{ 6169, 9918},{ 6582, 9863},{ 6962, 9813},
+        { 7316, 9759},{ 7645, 9707},{ 7948, 9660},{ 8262, 9623}
+      }
+    },
+    {
+      /*Cb  qi=27  INTRA*/
+      {
+        {    4,    3},{   79,  368},{  137,  745},{  166, 1180},
+        {  200, 1622},{  253, 2030},{  324, 2381},{  407, 2671},
+        {  487, 2925},{  567, 3156},{  640, 3372},{  712, 3580},
+        {  782, 3792},{  833, 4015},{  887, 4227},{  954, 4422},
+        { 1031, 4592},{ 1103, 4738},{ 1187, 4856},{ 1280, 4990},
+        { 1371, 5135},{ 1442, 5244},{ 1520, 5321},{ 1684, 5398}
+      },
+      /*Cb  qi=27  INTER*/
+      {
+        {  113,   20},{   90,  338},{   66,  661},{   67, 1034},
+        {   82, 1438},{   97, 1842},{  108, 2238},{  115, 2624},
+        {  123, 3000},{  130, 3361},{  138, 3708},{  146, 4040},
+        {  155, 4367},{  164, 4688},{  174, 4999},{  186, 5306},
+        {  203, 5609},{  222, 5908},{  243, 6202},{  268, 6494},
+        {  295, 6781},{  326, 7058},{  367, 7319},{  420, 7551}
+      }
+    },
+    {
+      /*Cr  qi=27  INTRA*/
+      {
+        {    5,    6},{   75,  380},{  133,  770},{  173, 1217},
+        {  214, 1650},{  268, 2040},{  337, 2375},{  418, 2631},
+        {  496, 2862},{  558, 3104},{  625, 3346},{  692, 3571},
+        {  753, 3786},{  825, 3989},{  896, 4182},{  969, 4352},
+        { 1059, 4479},{ 1144, 4614},{ 1212, 4757},{ 1284, 4871},
+        { 1380, 4982},{ 1457, 5125},{ 1528, 5267},{ 1651, 5346}
+      },
+      /*Cr  qi=27  INTER*/
+      {
+        {   92,   24},{   74,  341},{   61,  669},{   71, 1049},
+        {   88, 1448},{  100, 1849},{  107, 2243},{  113, 2631},
+        {  119, 3010},{  125, 3373},{  131, 3723},{  137, 4064},
+        {  146, 4396},{  159, 4720},{  172, 5033},{  189, 5340},
+        {  210, 5636},{  233, 5920},{  256, 6197},{  282, 6465},
+        {  310, 6730},{  332, 7000},{  359, 7259},{  385, 7515}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=28  INTRA*/
+      {
+        {  116,   -8},{  314, 1400},{  640, 2458},{ 1013, 3197},
+        { 1386, 3768},{ 1762, 4279},{ 2151, 4733},{ 2558, 5117},
+        { 2970, 5442},{ 3393, 5714},{ 3820, 5935},{ 4243, 6069},
+        { 4671, 6161},{ 5074, 6289},{ 5456, 6457},{ 5849, 6598},
+        { 6244, 6689},{ 6632, 6777},{ 6984, 6833},{ 7294, 6855},
+        { 7625, 6862},{ 7961, 6875},{ 8302, 6890},{ 8720, 6883}
+      },
+      /*Y'  qi=28  INTER*/
+      {
+        {   54,    8},{   81, 1333},{  154, 2793},{  231, 4138},
+        {  304, 5352},{  384, 6512},{  519, 7585},{  743, 8508},
+        { 1082, 9236},{ 1587, 9717},{ 2267, 9928},{ 3034, 9944},
+        { 3775, 9878},{ 4438, 9786},{ 5031, 9686},{ 5563, 9601},
+        { 6042, 9523},{ 6481, 9456},{ 6890, 9405},{ 7266, 9356},
+        { 7614, 9313},{ 7933, 9265},{ 8238, 9220},{ 8545, 9193}
+      }
+    },
+    {
+      /*Cb  qi=28  INTRA*/
+      {
+        {    3,    3},{   80,  368},{  138,  746},{  168, 1179},
+        {  208, 1615},{  268, 2014},{  345, 2354},{  432, 2637},
+        {  515, 2884},{  595, 3108},{  669, 3323},{  745, 3533},
+        {  818, 3740},{  876, 3953},{  932, 4160},{ 1003, 4349},
+        { 1088, 4501},{ 1154, 4648},{ 1241, 4768},{ 1349, 4889},
+        { 1441, 5023},{ 1524, 5113},{ 1611, 5187},{ 1783, 5283}
+      },
+      /*Cb  qi=28  INTER*/
+      {
+        {  117,   29},{   91,  341},{   65,  663},{   68, 1038},
+        {   85, 1440},{  100, 1841},{  110, 2234},{  119, 2616},
+        {  127, 2985},{  135, 3342},{  142, 3685},{  151, 4015},
+        {  162, 4337},{  174, 4652},{  186, 4960},{  201, 5264},
+        {  218, 5567},{  239, 5863},{  266, 6149},{  295, 6434},
+        {  328, 6715},{  371, 6976},{  409, 7239},{  460, 7477}
+      }
+    },
+    {
+      /*Cr  qi=28  INTRA*/
+      {
+        {    6,    7},{   79,  381},{  138,  771},{  178, 1215},
+        {  222, 1644},{  285, 2026},{  359, 2347},{  441, 2597},
+        {  521, 2827},{  588, 3066},{  655, 3303},{  725, 3523},
+        {  791, 3728},{  870, 3920},{  950, 4103},{ 1030, 4265},
+        { 1121, 4388},{ 1198, 4520},{ 1266, 4659},{ 1356, 4759},
+        { 1461, 4865},{ 1540, 4993},{ 1619, 5115},{ 1786, 5160}
+      },
+      /*Cr  qi=28  INTER*/
+      {
+        {   96,   18},{   78,  340},{   66,  672},{   74, 1051},
+        {   90, 1450},{  103, 1845},{  110, 2235},{  116, 2619},
+        {  122, 2995},{  129, 3356},{  137, 3702},{  146, 4038},
+        {  156, 4365},{  168, 4684},{  182, 4995},{  203, 5297},
+        {  227, 5588},{  253, 5866},{  282, 6131},{  311, 6394},
+        {  339, 6664},{  366, 6918},{  400, 7171},{  424, 7450}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=29  INTRA*/
+      {
+        {  112,    7},{  334, 1382},{  681, 2410},{ 1081, 3112},
+        { 1484, 3650},{ 1894, 4128},{ 2316, 4547},{ 2749, 4905},
+        { 3188, 5208},{ 3634, 5458},{ 4079, 5666},{ 4517, 5791},
+        { 4952, 5870},{ 5359, 5983},{ 5754, 6137},{ 6165, 6268},
+        { 6568, 6351},{ 6958, 6423},{ 7320, 6471},{ 7638, 6490},
+        { 7979, 6490},{ 8313, 6499},{ 8651, 6517},{ 9085, 6499}
+      },
+      /*Y'  qi=29  INTER*/
+      {
+        {   55,   15},{   85, 1336},{  160, 2780},{  242, 4104},
+        {  323, 5302},{  418, 6443},{  586, 7480},{  859, 8342},
+        { 1278, 8982},{ 1888, 9347},{ 2658, 9457},{ 3457, 9425},
+        { 4192, 9343},{ 4842, 9247},{ 5417, 9162},{ 5935, 9086},
+        { 6404, 9011},{ 6841, 8952},{ 7241, 8907},{ 7609, 8867},
+        { 7953, 8832},{ 8267, 8792},{ 8562, 8740},{ 8836, 8701}
+      }
+    },
+    {
+      /*Cb  qi=29  INTRA*/
+      {
+        {    5,    3},{   84,  368},{  144,  746},{  176, 1175},
+        {  219, 1604},{  285, 1991},{  372, 2318},{  462, 2591},
+        {  546, 2833},{  628, 3058},{  704, 3274},{  788, 3473},
+        {  870, 3664},{  935, 3865},{  995, 4059},{ 1072, 4239},
+        { 1167, 4388},{ 1248, 4518},{ 1334, 4634},{ 1429, 4765},
+        { 1536, 4884},{ 1628, 4964},{ 1716, 5038},{ 1885, 5128}
+      },
+      /*Cb  qi=29  INTER*/
+      {
+        {  126,   25},{   95,  340},{   69,  662},{   71, 1039},
+        {   88, 1440},{  102, 1839},{  113, 2227},{  122, 2604},
+        {  132, 2969},{  141, 3320},{  151, 3659},{  161, 3985},
+        {  172, 4301},{  186, 4612},{  200, 4917},{  219, 5213},
+        {  241, 5509},{  265, 5800},{  296, 6081},{  329, 6360},
+        {  369, 6633},{  414, 6899},{  465, 7148},{  520, 7387}
+      }
+    },
+    {
+      /*Cr  qi=29  INTRA*/
+      {
+        {    6,    7},{   82,  382},{  142,  772},{  185, 1211},
+        {  233, 1632},{  303, 2000},{  388, 2306},{  475, 2550},
+        {  556, 2779},{  627, 3007},{  707, 3237},{  778, 3459},
+        {  843, 3654},{  927, 3834},{ 1012, 4012},{ 1101, 4152},
+        { 1197, 4262},{ 1275, 4399},{ 1359, 4511},{ 1455, 4596},
+        { 1562, 4708},{ 1644, 4833},{ 1719, 4954},{ 1888, 4988}
+      },
+      /*Cr  qi=29  INTER*/
+      {
+        {  101,   28},{   81,  343},{   67,  673},{   75, 1053},
+        {   93, 1450},{  106, 1844},{  113, 2230},{  119, 2610},
+        {  127, 2980},{  135, 3334},{  143, 3676},{  153, 4007},
+        {  165, 4330},{  180, 4645},{  201, 4951},{  224, 5243},
+        {  253, 5522},{  284, 5794},{  314, 6060},{  345, 6322},
+        {  381, 6578},{  419, 6828},{  455, 7073},{  495, 7316}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=30  INTRA*/
+      {
+        {  112,    8},{  335, 1380},{  682, 2401},{ 1083, 3093},
+        { 1489, 3619},{ 1902, 4092},{ 2332, 4511},{ 2777, 4865},
+        { 3231, 5156},{ 3693, 5394},{ 4153, 5585},{ 4605, 5689},
+        { 5049, 5764},{ 5468, 5871},{ 5875, 6004},{ 6295, 6120},
+        { 6706, 6201},{ 7099, 6273},{ 7461, 6311},{ 7785, 6320},
+        { 8128, 6322},{ 8469, 6331},{ 8806, 6342},{ 9220, 6338}
+      },
+      /*Y'  qi=30  INTER*/
+      {
+        {   58,    8},{   90, 1340},{  169, 2771},{  257, 4079},
+        {  345, 5266},{  459, 6387},{  660, 7383},{  990, 8180},
+        { 1496, 8726},{ 2203, 8992},{ 3029, 9038},{ 3833, 8984},
+        { 4549, 8900},{ 5183, 8813},{ 5745, 8735},{ 6250, 8674},
+        { 6715, 8619},{ 7138, 8565},{ 7529, 8528},{ 7899, 8495},
+        { 8234, 8465},{ 8550, 8429},{ 8856, 8395},{ 9160, 8374}
+      }
+    },
+    {
+      /*Cb  qi=30  INTRA*/
+      {
+        {    7,    3},{   88,  369},{  149,  747},{  185, 1175},
+        {  232, 1599},{  304, 1976},{  392, 2293},{  486, 2557},
+        {  573, 2797},{  656, 3027},{  735, 3243},{  819, 3442},
+        {  903, 3629},{  966, 3828},{ 1025, 4027},{ 1105, 4204},
+        { 1201, 4343},{ 1282, 4469},{ 1379, 4575},{ 1486, 4689},
+        { 1588, 4813},{ 1678, 4900},{ 1767, 4969},{ 1911, 5080}
+      },
+      /*Cb  qi=30  INTER*/
+      {
+        {  120,   23},{   96,  336},{   72,  661},{   75, 1043},
+        {   91, 1441},{  105, 1837},{  117, 2221},{  127, 2592},
+        {  137, 2953},{  148, 3301},{  159, 3635},{  170, 3959},
+        {  184, 4271},{  199, 4578},{  216, 4879},{  238, 5175},
+        {  262, 5466},{  294, 5750},{  332, 6027},{  373, 6298},
+        {  421, 6559},{  473, 6805},{  526, 7053},{  587, 7298}
+      }
+    },
+    {
+      /*Cr  qi=30  INTRA*/
+      {
+        {   10,    7},{   89,  384},{  147,  773},{  192, 1211},
+        {  245, 1627},{  322, 1984},{  412, 2280},{  501, 2520},
+        {  583, 2750},{  654, 2982},{  736, 3207},{  810, 3419},
+        {  873, 3614},{  957, 3794},{ 1048, 3965},{ 1139, 4102},
+        { 1237, 4208},{ 1327, 4328},{ 1408, 4448},{ 1496, 4545},
+        { 1604, 4652},{ 1699, 4760},{ 1780, 4877},{ 1937, 4942}
+      },
+      /*Cr  qi=30  INTER*/
+      {
+        {  115,   26},{   89,  342},{   70,  672},{   79, 1055},
+        {   96, 1451},{  108, 1841},{  116, 2222},{  124, 2599},
+        {  132, 2965},{  141, 3316},{  151, 3655},{  163, 3984},
+        {  178, 4301},{  197, 4609},{  219, 4909},{  247, 5195},
+        {  280, 5469},{  317, 5734},{  351, 5991},{  383, 6248},
+        {  423, 6500},{  467, 6744},{  502, 6995},{  558, 7226}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=31  INTRA*/
+      {
+        {  116,   20},{  359, 1361},{  732, 2350},{ 1162, 3010},
+        { 1597, 3507},{ 2042, 3950},{ 2503, 4339},{ 2974, 4670},
+        { 3446, 4951},{ 3922, 5179},{ 4394, 5357},{ 4858, 5454},
+        { 5313, 5519},{ 5734, 5626},{ 6154, 5755},{ 6585, 5859},
+        { 7004, 5928},{ 7408, 5998},{ 7775, 6039},{ 8102, 6048},
+        { 8442, 6051},{ 8790, 6054},{ 9136, 6057},{ 9554, 6041}
+      },
+      /*Y'  qi=31  INTER*/
+      {
+        {   53,   12},{   90, 1340},{  169, 2765},{  259, 4062},
+        {  353, 5236},{  483, 6340},{  713, 7305},{ 1086, 8059},
+        { 1651, 8548},{ 2423, 8751},{ 3288, 8754},{ 4106, 8674},
+        { 4827, 8572},{ 5451, 8482},{ 6007, 8407},{ 6514, 8344},
+        { 6970, 8282},{ 7397, 8225},{ 7795, 8193},{ 8159, 8161},
+        { 8498, 8120},{ 8814, 8093},{ 9127, 8066},{ 9432, 8040}
+      }
+    },
+    {
+      /*Cb  qi=31  INTRA*/
+      {
+        {    7,    3},{   88,  369},{  149,  746},{  185, 1173},
+        {  234, 1595},{  308, 1967},{  399, 2278},{  494, 2537},
+        {  583, 2774},{  669, 2997},{  755, 3204},{  847, 3390},
+        {  936, 3569},{ 1008, 3759},{ 1078, 3942},{ 1162, 4104},
+        { 1262, 4238},{ 1352, 4364},{ 1442, 4470},{ 1557, 4567},
+        { 1676, 4674},{ 1759, 4781},{ 1850, 4853},{ 2043, 4897}
+      },
+      /*Cb  qi=31  INTER*/
+      {
+        {  121,   23},{   96,  335},{   72,  660},{   74, 1043},
+        {   90, 1440},{  105, 1834},{  116, 2217},{  127, 2586},
+        {  138, 2945},{  148, 3293},{  159, 3626},{  172, 3945},
+        {  185, 4256},{  202, 4559},{  223, 4856},{  245, 5150},
+        {  272, 5440},{  306, 5719},{  346, 5989},{  391, 6253},
+        {  443, 6511},{  510, 6743},{  583, 6965},{  651, 7182}
+      }
+    },
+    {
+      /*Cr  qi=31  INTRA*/
+      {
+        {   10,    7},{   88,  384},{  147,  773},{  192, 1209},
+        {  247, 1622},{  326, 1974},{  417, 2262},{  509, 2500},
+        {  596, 2726},{  670, 2949},{  754, 3170},{  836, 3370},
+        {  912, 3548},{  999, 3724},{ 1093, 3888},{ 1198, 4000},
+        { 1304, 4095},{ 1384, 4230},{ 1470, 4347},{ 1577, 4422},
+        { 1696, 4513},{ 1798, 4620},{ 1869, 4746},{ 1991, 4798}
+      },
+      /*Cr  qi=31  INTER*/
+      {
+        {  113,   32},{   88,  345},{   69,  674},{   79, 1055},
+        {   96, 1451},{  108, 1839},{  115, 2218},{  123, 2592},
+        {  132, 2957},{  141, 3308},{  151, 3643},{  163, 3968},
+        {  179, 4285},{  200, 4590},{  225, 4886},{  254, 5169},
+        {  291, 5436},{  330, 5696},{  368, 5951},{  409, 6200},
+        {  452, 6448},{  493, 6695},{  536, 6940},{  571, 7204}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=32  INTRA*/
+      {
+        {  123,   26},{  370, 1356},{  756, 2321},{ 1211, 2944},
+        { 1674, 3408},{ 2148, 3826},{ 2639, 4193},{ 3138, 4504},
+        { 3634, 4765},{ 4133, 4973},{ 4625, 5137},{ 5101, 5225},
+        { 5567, 5274},{ 6002, 5363},{ 6437, 5482},{ 6885, 5566},
+        { 7312, 5625},{ 7723, 5686},{ 8101, 5721},{ 8429, 5732},
+        { 8769, 5728},{ 9120, 5726},{ 9472, 5723},{ 9918, 5700}
+      },
+      /*Y'  qi=32  INTER*/
+      {
+        {   54,   -3},{   95, 1343},{  179, 2750},{  276, 4027},
+        {  382, 5185},{  543, 6256},{  830, 7161},{ 1301, 7815},
+        { 2003, 8172},{ 2883, 8266},{ 3779, 8217},{ 4578, 8127},
+        { 5274, 8035},{ 5886, 7952},{ 6430, 7887},{ 6929, 7835},
+        { 7380, 7779},{ 7796, 7737},{ 8190, 7705},{ 8552, 7672},
+        { 8896, 7640},{ 9210, 7612},{ 9510, 7589},{ 9746, 7552}
+      }
+    },
+    {
+      /*Cb  qi=32  INTRA*/
+      {
+        {    6,    3},{   89,  369},{  153,  746},{  193, 1167},
+        {  247, 1577},{  330, 1935},{  429, 2236},{  528, 2494},
+        {  620, 2732},{  712, 2948},{  801, 3146},{  898, 3325},
+        {  999, 3489},{ 1078, 3664},{ 1155, 3832},{ 1251, 3985},
+        { 1360, 4115},{ 1451, 4236},{ 1549, 4338},{ 1667, 4433},
+        { 1797, 4522},{ 1891, 4613},{ 1989, 4687},{ 2162, 4776}
+      },
+      /*Cb  qi=32  INTER*/
+      {
+        {  116,   -1},{   98,  321},{   80,  656},{   80, 1042},
+        {   96, 1438},{  110, 1827},{  122, 2205},{  133, 2570},
+        {  144, 2925},{  157, 3268},{  170, 3597},{  185, 3911},
+        {  202, 4216},{  221, 4516},{  244, 4809},{  273, 5096},
+        {  308, 5376},{  350, 5644},{  401, 5907},{  459, 6160},
+        {  520, 6401},{  592, 6630},{  676, 6837},{  758, 7050}
+      }
+    },
+    {
+      /*Cr  qi=32  INTRA*/
+      {
+        {   12,    7},{   91,  386},{  152,  773},{  201, 1202},
+        {  261, 1603},{  347, 1942},{  447, 2223},{  540, 2460},
+        {  626, 2684},{  711, 2901},{  801, 3115},{  887, 3312},
+        {  969, 3480},{ 1068, 3633},{ 1176, 3779},{ 1283, 3885},
+        { 1392, 3969},{ 1485, 4090},{ 1573, 4206},{ 1686, 4274},
+        { 1813, 4354},{ 1911, 4459},{ 2004, 4563},{ 2162, 4590}
+      },
+      /*Cr  qi=32  INTER*/
+      {
+        {  129,    5},{   98,  334},{   75,  673},{   84, 1055},
+        {  101, 1448},{  113, 1832},{  121, 2206},{  129, 2577},
+        {  140, 2937},{  151, 3282},{  163, 3614},{  179, 3932},
+        {  198, 4240},{  221, 4542},{  252, 4830},{  290, 5102},
+        {  329, 5364},{  373, 5618},{  420, 5864},{  468, 6105},
+        {  513, 6351},{  564, 6587},{  624, 6810},{  697, 7017}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=33  INTRA*/
+      {
+        {  115,   36},{  388, 1338},{  791, 2289},{ 1258, 2899},
+        { 1732, 3352},{ 2220, 3760},{ 2730, 4117},{ 3244, 4415},
+        { 3751, 4662},{ 4261, 4858},{ 4766, 5012},{ 5249, 5094},
+        { 5719, 5141},{ 6159, 5225},{ 6597, 5333},{ 7044, 5416},
+        { 7474, 5472},{ 7893, 5531},{ 8268, 5570},{ 8591, 5580},
+        { 8931, 5578},{ 9283, 5579},{ 9634, 5582},{10067, 5560}
+      },
+      /*Y'  qi=33  INTER*/
+      {
+        {   65,  -14},{  102, 1345},{  190, 2736},{  294, 3999},
+        {  411, 5146},{  597, 6192},{  934, 7045},{ 1488, 7622},
+        { 2281, 7895},{ 3213, 7937},{ 4108, 7871},{ 4883, 7784},
+        { 5556, 7709},{ 6150, 7643},{ 6685, 7585},{ 7176, 7539},
+        { 7620, 7502},{ 8034, 7466},{ 8427, 7435},{ 8793, 7409},
+        { 9136, 7386},{ 9446, 7364},{ 9743, 7339},{10025, 7303}
+      }
+    },
+    {
+      /*Cb  qi=33  INTRA*/
+      {
+        {    5,    3},{   92,  369},{  159,  746},{  203, 1163},
+        {  263, 1564},{  353, 1911},{  458, 2204},{  557, 2460},
+        {  650, 2697},{  744, 2913},{  836, 3110},{  934, 3292},
+        { 1036, 3454},{ 1125, 3616},{ 1204, 3781},{ 1298, 3932},
+        { 1410, 4058},{ 1507, 4170},{ 1606, 4265},{ 1725, 4358},
+        { 1853, 4445},{ 1955, 4535},{ 2067, 4597},{ 2258, 4663}
+      },
+      /*Cb  qi=33  INTER*/
+      {
+        {  109,   37},{   94,  343},{   81,  662},{   85, 1042},
+        {  102, 1436},{  116, 1823},{  128, 2195},{  141, 2554},
+        {  154, 2906},{  167, 3246},{  183, 3570},{  202, 3881},
+        {  220, 4185},{  241, 4482},{  268, 4772},{  302, 5053},
+        {  341, 5328},{  388, 5592},{  446, 5846},{  507, 6096},
+        {  581, 6328},{  670, 6534},{  762, 6731},{  842, 6922}
+      }
+    },
+    {
+      /*Cr  qi=33  INTRA*/
+      {
+        {   11,    7},{   93,  387},{  158,  774},{  211, 1197},
+        {  278, 1589},{  372, 1917},{  475, 2191},{  569, 2429},
+        {  658, 2655},{  744, 2868},{  835, 3083},{  926, 3271},
+        { 1010, 3430},{ 1110, 3586},{ 1224, 3724},{ 1336, 3826},
+        { 1449, 3908},{ 1547, 4021},{ 1636, 4136},{ 1751, 4200},
+        { 1886, 4277},{ 1977, 4384},{ 2070, 4474},{ 2232, 4510}
+      },
+      /*Cr  qi=33  INTER*/
+      {
+        {   77,    9},{   90,  347},{   80,  674},{   91, 1053},
+        {  107, 1444},{  119, 1825},{  127, 2196},{  137, 2563},
+        {  149, 2919},{  161, 3259},{  176, 3588},{  194, 3905},
+        {  217, 4209},{  246, 4504},{  280, 4786},{  320, 5055},
+        {  364, 5316},{  409, 5565},{  460, 5804},{  517, 6039},
+        {  578, 6264},{  640, 6489},{  701, 6721},{  772, 6948}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=34  INTRA*/
+      {
+        {  124,   40},{  401, 1333},{  823, 2262},{ 1318, 2842},
+        { 1823, 3265},{ 2339, 3650},{ 2872, 3991},{ 3405, 4274},
+        { 3926, 4513},{ 4448, 4704},{ 4961, 4845},{ 5450, 4921},
+        { 5925, 4971},{ 6372, 5053},{ 6813, 5160},{ 7264, 5242},
+        { 7704, 5291},{ 8124, 5346},{ 8500, 5382},{ 8831, 5384},
+        { 9178, 5380},{ 9525, 5387},{ 9869, 5389},{10310, 5356}
+      },
+      /*Y'  qi=34  INTER*/
+      {
+        {   64,  -17},{  101, 1344},{  190, 2730},{  299, 3981},
+        {  430, 5110},{  648, 6127},{ 1036, 6933},{ 1664, 7445},
+        { 2535, 7652},{ 3504, 7653},{ 4402, 7572},{ 5173, 7479},
+        { 5843, 7400},{ 6441, 7334},{ 6976, 7280},{ 7464, 7231},
+        { 7910, 7189},{ 8332, 7157},{ 8730, 7125},{ 9091, 7103},
+        { 9422, 7086},{ 9753, 7061},{10067, 7036},{10316, 7029}
+      }
+    },
+    {
+      /*Cb  qi=34  INTRA*/
+      {
+        {    5,    3},{   91,  369},{  158,  746},{  204, 1162},
+        {  266, 1561},{  358, 1903},{  466, 2189},{  570, 2439},
+        {  665, 2671},{  765, 2880},{  864, 3069},{  970, 3238},
+        { 1079, 3392},{ 1174, 3545},{ 1265, 3693},{ 1360, 3841},
+        { 1471, 3968},{ 1572, 4083},{ 1675, 4181},{ 1804, 4255},
+        { 1939, 4332},{ 2048, 4411},{ 2155, 4484},{ 2339, 4584}
+      },
+      /*Cb  qi=34  INTER*/
+      {
+        {   99,   44},{   92,  345},{   82,  661},{   86, 1043},
+        {  101, 1436},{  116, 1821},{  128, 2191},{  140, 2549},
+        {  154, 2898},{  168, 3235},{  185, 3556},{  203, 3865},
+        {  224, 4166},{  248, 4457},{  278, 4741},{  315, 5021},
+        {  361, 5289},{  416, 5546},{  483, 5792},{  559, 6025},
+        {  651, 6237},{  752, 6432},{  849, 6626},{  967, 6790}
+      }
+    },
+    {
+      /*Cr  qi=34  INTRA*/
+      {
+        {   11,    7},{   93,  387},{  158,  773},{  212, 1195},
+        {  282, 1584},{  378, 1909},{  483, 2179},{  578, 2414},
+        {  671, 2633},{  766, 2837},{  866, 3038},{  960, 3223},
+        { 1049, 3376},{ 1158, 3520},{ 1285, 3644},{ 1400, 3740},
+        { 1505, 3828},{ 1616, 3928},{ 1713, 4030},{ 1820, 4104},
+        { 1957, 4185},{ 2063, 4280},{ 2160, 4355},{ 2320, 4341}
+      },
+      /*Cr  qi=34  INTER*/
+      {
+        {   78,   11},{   89,  347},{   79,  674},{   90, 1053},
+        {  106, 1444},{  117, 1823},{  127, 2192},{  137, 2558},
+        {  149, 2912},{  163, 3249},{  178, 3574},{  197, 3888},
+        {  222, 4189},{  252, 4481},{  293, 4755},{  341, 5013},
+        {  386, 5268},{  436, 5512},{  498, 5743},{  563, 5970},
+        {  622, 6200},{  694, 6415},{  776, 6622},{  871, 6818}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=35  INTRA*/
+      {
+        {  116,   51},{  433, 1312},{  881, 2221},{ 1406, 2771},
+        { 1948, 3156},{ 2511, 3501},{ 3085, 3811},{ 3654, 4066},
+        { 4212, 4273},{ 4763, 4444},{ 5298, 4572},{ 5799, 4638},
+        { 6285, 4678},{ 6747, 4746},{ 7203, 4838},{ 7673, 4905},
+        { 8124, 4950},{ 8552, 5003},{ 8938, 5027},{ 9275, 5026},
+        { 9628, 5019},{ 9981, 5024},{10331, 5030},{10795, 5000}
+      },
+      /*Y'  qi=35  INTER*/
+      {
+        {   71,  -10},{  108, 1348},{  203, 2710},{  325, 3938},
+        {  485, 5040},{  766, 6000},{ 1267, 6706},{ 2048, 7089},
+        { 3037, 7191},{ 4032, 7146},{ 4903, 7061},{ 5648, 6977},
+        { 6301, 6912},{ 6884, 6857},{ 7413, 6812},{ 7898, 6775},
+        { 8342, 6739},{ 8764, 6710},{ 9160, 6688},{ 9519, 6668},
+        { 9859, 6646},{10190, 6625},{10492, 6612},{10755, 6595}
+      }
+    },
+    {
+      /*Cb  qi=35  INTRA*/
+      {
+        {    6,    3},{   95,  369},{  164,  746},{  214, 1156},
+        {  287, 1542},{  390, 1869},{  504, 2143},{  611, 2388},
+        {  712, 2613},{  822, 2811},{  937, 2987},{ 1055, 3147},
+        { 1174, 3285},{ 1286, 3420},{ 1386, 3560},{ 1488, 3698},
+        { 1604, 3814},{ 1714, 3916},{ 1825, 4008},{ 1958, 4088},
+        { 2101, 4159},{ 2224, 4226},{ 2339, 4292},{ 2538, 4383}
+      },
+      /*Cb  qi=35  INTER*/
+      {
+        {   98,   41},{   90,  348},{   86,  665},{   92, 1042},
+        {  108, 1432},{  122, 1812},{  136, 2175},{  151, 2528},
+        {  165, 2872},{  182, 3202},{  202, 3516},{  225, 3819},
+        {  251, 4112},{  281, 4398},{  320, 4675},{  367, 4944},
+        {  421, 5204},{  493, 5450},{  579, 5679},{  672, 5892},
+        {  785, 6082},{  906, 6258},{ 1026, 6432},{ 1153, 6592}
+      }
+    },
+    {
+      /*Cr  qi=35  INTRA*/
+      {
+        {   12,    7},{   98,  388},{  166,  773},{  226, 1187},
+        {  306, 1563},{  411, 1874},{  524, 2134},{  622, 2365},
+        {  721, 2577},{  826, 2768},{  947, 2946},{ 1066, 3106},
+        { 1163, 3250},{ 1274, 3395},{ 1417, 3508},{ 1539, 3590},
+        { 1639, 3671},{ 1754, 3765},{ 1865, 3855},{ 1979, 3921},
+        { 2127, 3998},{ 2249, 4085},{ 2346, 4172},{ 2473, 4210}
+      },
+      /*Cr  qi=35  INTER*/
+      {
+        {   86,   12},{   94,  354},{   85,  677},{   96, 1052},
+        {  113, 1439},{  125, 1811},{  135, 2177},{  147, 2537},
+        {  160, 2884},{  177, 3215},{  195, 3535},{  219, 3842},
+        {  252, 4133},{  292, 4413},{  339, 4680},{  396, 4928},
+        {  455, 5169},{  514, 5408},{  588, 5626},{  672, 5835},
+        {  750, 6051},{  837, 6257},{  943, 6442},{ 1073, 6595}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=36  INTRA*/
+      {
+        {  116,   52},{  432, 1312},{  881, 2215},{ 1407, 2759},
+        { 1948, 3140},{ 2511, 3484},{ 3090, 3789},{ 3672, 4036},
+        { 4243, 4236},{ 4803, 4397},{ 5346, 4517},{ 5856, 4581},
+        { 6350, 4614},{ 6821, 4675},{ 7286, 4763},{ 7754, 4832},
+        { 8201, 4875},{ 8631, 4922},{ 9015, 4948},{ 9351, 4945},
+        { 9706, 4941},{10061, 4948},{10408, 4949},{10878, 4923}
+      },
+      /*Y'  qi=36  INTER*/
+      {
+        {   63,  -16},{  114, 1332},{  216, 2690},{  343, 3914},
+        {  515, 5009},{  829, 5939},{ 1399, 6586},{ 2263, 6901},
+        { 3290, 6967},{ 4272, 6920},{ 5115, 6847},{ 5839, 6779},
+        { 6478, 6726},{ 7051, 6685},{ 7571, 6649},{ 8050, 6614},
+        { 8495, 6587},{ 8908, 6567},{ 9298, 6550},{ 9673, 6530},
+        {10005, 6512},{10324, 6499},{10640, 6483},{10936, 6487}
+      }
+    },
+    {
+      /*Cb  qi=36  INTRA*/
+      {
+        {    6,    3},{   98,  370},{  170,  746},{  225, 1150},
+        {  306, 1527},{  416, 1845},{  534, 2116},{  642, 2363},
+        {  743, 2591},{  851, 2794},{  964, 2972},{ 1081, 3133},
+        { 1198, 3275},{ 1311, 3410},{ 1411, 3547},{ 1519, 3680},
+        { 1642, 3789},{ 1750, 3892},{ 1860, 3982},{ 1998, 4054},
+        { 2141, 4129},{ 2256, 4204},{ 2372, 4278},{ 2567, 4356}
+      },
+      /*Cb  qi=36  INTER*/
+      {
+        {  107,   30},{   96,  346},{   88,  667},{  100, 1039},
+        {  115, 1426},{  128, 1804},{  142, 2164},{  158, 2512},
+        {  176, 2851},{  195, 3178},{  218, 3491},{  243, 3791},
+        {  270, 4084},{  307, 4365},{  348, 4638},{  397, 4908},
+        {  464, 5157},{  545, 5392},{  635, 5620},{  734, 5831},
+        {  854, 6015},{  993, 6170},{ 1124, 6327},{ 1234, 6502}
+      }
+    },
+    {
+      /*Cr  qi=36  INTRA*/
+      {
+        {   12,    7},{  102,  388},{  172,  773},{  239, 1182},
+        {  328, 1546},{  439, 1848},{  554, 2106},{  651, 2341},
+        {  747, 2561},{  850, 2757},{  972, 2934},{ 1086, 3097},
+        { 1182, 3245},{ 1302, 3382},{ 1447, 3491},{ 1572, 3567},
+        { 1677, 3641},{ 1793, 3733},{ 1899, 3828},{ 2013, 3894},
+        { 2163, 3967},{ 2283, 4059},{ 2387, 4142},{ 2559, 4145}
+      },
+      /*Cr  qi=36  INTER*/
+      {
+        {   98,  -10},{   96,  347},{   89,  676},{  102, 1048},
+        {  118, 1433},{  130, 1804},{  141, 2167},{  154, 2523},
+        {  171, 2866},{  190, 3194},{  212, 3508},{  240, 3809},
+        {  276, 4099},{  320, 4377},{  372, 4638},{  428, 4887},
+        {  492, 5122},{  560, 5353},{  638, 5572},{  725, 5779},
+        {  814, 5985},{  902, 6192},{ 1013, 6377},{ 1155, 6527}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=37  INTRA*/
+      {
+        {  109,   58},{  445, 1302},{  927, 2177},{ 1489, 2689},
+        { 2053, 3052},{ 2632, 3387},{ 3230, 3683},{ 3830, 3922},
+        { 4417, 4114},{ 4992, 4266},{ 5546, 4375},{ 6067, 4430},
+        { 6571, 4459},{ 7046, 4516},{ 7513, 4599},{ 7991, 4663},
+        { 8445, 4706},{ 8883, 4749},{ 9273, 4771},{ 9612, 4770},
+        { 9970, 4765},{10325, 4773},{10672, 4778},{11106, 4758}
+      },
+      /*Y'  qi=37  INTER*/
+      {
+        {   56,  -14},{  114, 1333},{  218, 2683},{  354, 3894},
+        {  550, 4966},{  916, 5854},{ 1569, 6437},{ 2520, 6685},
+        { 3596, 6704},{ 4585, 6635},{ 5424, 6556},{ 6147, 6489},
+        { 6787, 6437},{ 7358, 6395},{ 7876, 6358},{ 8361, 6325},
+        { 8807, 6294},{ 9229, 6271},{ 9631, 6253},{10002, 6238},
+        {10356, 6228},{10678, 6212},{10975, 6197},{11274, 6185}
+      }
+    },
+    {
+      /*Cb  qi=37  INTRA*/
+      {
+        {    6,    3},{   99,  370},{  171,  746},{  227, 1149},
+        {  309, 1522},{  421, 1836},{  541, 2104},{  652, 2347},
+        {  757, 2572},{  871, 2768},{  989, 2936},{ 1111, 3087},
+        { 1238, 3223},{ 1357, 3352},{ 1465, 3486},{ 1576, 3612},
+        { 1709, 3705},{ 1828, 3801},{ 1937, 3895},{ 2076, 3967},
+        { 2220, 4035},{ 2345, 4104},{ 2466, 4173},{ 2680, 4265}
+      },
+      /*Cb  qi=37  INTER*/
+      {
+        {  111,   27},{   97,  344},{   87,  667},{   99, 1038},
+        {  115, 1425},{  128, 1802},{  143, 2160},{  159, 2506},
+        {  176, 2843},{  198, 3167},{  220, 3477},{  247, 3774},
+        {  280, 4061},{  321, 4338},{  368, 4608},{  427, 4867},
+        {  501, 5109},{  595, 5332},{  701, 5544},{  818, 5738},
+        {  956, 5905},{ 1105, 6066},{ 1248, 6217},{ 1381, 6353}
+      }
+    },
+    {
+      /*Cr  qi=37  INTRA*/
+      {
+        {   12,    7},{  102,  388},{  173,  773},{  242, 1180},
+        {  331, 1541},{  444, 1839},{  562, 2095},{  662, 2326},
+        {  763, 2540},{  871, 2728},{ 1003, 2892},{ 1130, 3045},
+        { 1230, 3188},{ 1350, 3321},{ 1503, 3418},{ 1634, 3492},
+        { 1737, 3568},{ 1856, 3653},{ 1970, 3744},{ 2091, 3802},
+        { 2247, 3871},{ 2371, 3962},{ 2477, 4041},{ 2655, 4052}
+      },
+      /*Cr  qi=37  INTER*/
+      {
+        {   89,   -9},{   97,  347},{   88,  677},{  102, 1048},
+        {  118, 1432},{  130, 1802},{  141, 2163},{  154, 2517},
+        {  172, 2857},{  192, 3181},{  216, 3494},{  246, 3793},
+        {  286, 4074},{  337, 4343},{  395, 4600},{  464, 4837},
+        {  534, 5066},{  608, 5289},{  694, 5501},{  788, 5704},
+        {  893, 5901},{ 1010, 6088},{ 1151, 6249},{ 1331, 6374}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=38  INTRA*/
+      {
+        {  107,   65},{  476, 1286},{  968, 2148},{ 1548, 2641},
+        { 2141, 2979},{ 2757, 3289},{ 3390, 3564},{ 4020, 3784},
+        { 4632, 3957},{ 5224, 4097},{ 5794, 4201},{ 6326, 4250},
+        { 6828, 4274},{ 7309, 4322},{ 7790, 4401},{ 8271, 4463},
+        { 8729, 4498},{ 9165, 4540},{ 9552, 4566},{ 9901, 4560},
+        {10266, 4552},{10617, 4563},{10964, 4572},{11393, 4567}
+      },
+      /*Y'  qi=38  INTER*/
+      {
+        {   57,  -13},{  118, 1332},{  233, 2665},{  386, 3856},
+        {  620, 4899},{ 1070, 5722},{ 1849, 6211},{ 2898, 6384},
+        { 3989, 6376},{ 4947, 6311},{ 5754, 6249},{ 6454, 6199},
+        { 7077, 6161},{ 7640, 6132},{ 8159, 6101},{ 8639, 6076},
+        { 9081, 6054},{ 9502, 6037},{ 9900, 6027},{10274, 6012},
+        {10621, 5999},{10938, 5991},{11237, 5977},{11557, 5966}
+      }
+    },
+    {
+      /*Cb  qi=38  INTRA*/
+      {
+        {    8,    3},{  104,  370},{  179,  744},{  243, 1139},
+        {  338, 1498},{  458, 1801},{  584, 2060},{  700, 2297},
+        {  812, 2514},{  935, 2699},{ 1061, 2858},{ 1189, 3007},
+        { 1321, 3141},{ 1446, 3266},{ 1563, 3388},{ 1684, 3512},
+        { 1816, 3614},{ 1942, 3702},{ 2055, 3793},{ 2201, 3857},
+        { 2357, 3923},{ 2477, 3994},{ 2593, 4061},{ 2768, 4178}
+      },
+      /*Cb  qi=38  INTER*/
+      {
+        {  118,   24},{  102,  342},{   91,  663},{  101, 1040},
+        {  116, 1427},{  131, 1799},{  147, 2152},{  168, 2491},
+        {  191, 2822},{  215, 3139},{  244, 3441},{  276, 3731},
+        {  316, 4013},{  363, 4286},{  423, 4546},{  495, 4795},
+        {  584, 5028},{  691, 5242},{  814, 5439},{  959, 5608},
+        { 1119, 5759},{ 1277, 5906},{ 1449, 6035},{ 1655, 6144}
+      }
+    },
+    {
+      /*Cr  qi=38  INTRA*/
+      {
+        {   12,    6},{  106,  387},{  182,  771},{  261, 1168},
+        {  364, 1514},{  483, 1802},{  603, 2053},{  707, 2282},
+        {  817, 2489},{  933, 2670},{ 1074, 2825},{ 1210, 2967},
+        { 1320, 3104},{ 1444, 3229},{ 1599, 3324},{ 1735, 3396},
+        { 1846, 3464},{ 1971, 3547},{ 2086, 3646},{ 2206, 3711},
+        { 2366, 3773},{ 2499, 3859},{ 2603, 3945},{ 2766, 3952}
+      },
+      /*Cr  qi=38  INTER*/
+      {
+        {   86,   -9},{   91,  352},{   85,  680},{  102, 1053},
+        {  119, 1435},{  132, 1799},{  146, 2153},{  162, 2501},
+        {  183, 2835},{  209, 3154},{  240, 3458},{  278, 3751},
+        {  327, 4025},{  388, 4284},{  455, 4532},{  529, 4766},
+        {  616, 4980},{  711, 5188},{  815, 5386},{  920, 5583},
+        { 1042, 5770},{ 1186, 5936},{ 1348, 6080},{ 1542, 6196}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=39  INTRA*/
+      {
+        {  103,   66},{  479, 1283},{  998, 2125},{ 1610, 2591},
+        { 2223, 2913},{ 2855, 3214},{ 3501, 3482},{ 4146, 3698},
+        { 4772, 3868},{ 5376, 3999},{ 5956, 4095},{ 6496, 4140},
+        { 7008, 4162},{ 7499, 4209},{ 7987, 4282},{ 8478, 4338},
+        { 8947, 4374},{ 9385, 4417},{ 9783, 4437},{10143, 4433},
+        {10504, 4424},{10866, 4435},{11225, 4444},{11665, 4430}
+      },
+      /*Y'  qi=39  INTER*/
+      {
+        {   56,    2},{  118, 1332},{  235, 2660},{  395, 3843},
+        {  653, 4867},{ 1153, 5652},{ 2003, 6089},{ 3113, 6214},
+        { 4228, 6178},{ 5189, 6102},{ 6002, 6031},{ 6707, 5976},
+        { 7336, 5936},{ 7901, 5900},{ 8424, 5870},{ 8915, 5844},
+        { 9361, 5822},{ 9784, 5807},{10187, 5794},{10571, 5778},
+        {10931, 5763},{11264, 5751},{11582, 5742},{11916, 5730}
+      }
+    },
+    {
+      /*Cb  qi=39  INTRA*/
+      {
+        {    8,    3},{  104,  370},{  179,  744},{  244, 1138},
+        {  340, 1496},{  461, 1796},{  588, 2053},{  705, 2288},
+        {  820, 2503},{  945, 2684},{ 1073, 2840},{ 1210, 2981},
+        { 1352, 3106},{ 1480, 3225},{ 1603, 3342},{ 1728, 3464},
+        { 1865, 3559},{ 1990, 3645},{ 2106, 3734},{ 2258, 3796},
+        { 2413, 3856},{ 2540, 3920},{ 2667, 3986},{ 2887, 4060}
+      },
+      /*Cb  qi=39  INTER*/
+      {
+        {  119,   19},{  103,  340},{   90,  664},{  100, 1040},
+        {  115, 1426},{  131, 1797},{  148, 2148},{  169, 2486},
+        {  192, 2816},{  217, 3131},{  247, 3432},{  282, 3721},
+        {  324, 3999},{  374, 4268},{  435, 4526},{  520, 4766},
+        {  621, 4990},{  738, 5194},{  878, 5376},{ 1035, 5543},
+        { 1202, 5686},{ 1374, 5819},{ 1545, 5950},{ 1729, 6064}
+      }
+    },
+    {
+      /*Cr  qi=39  INTRA*/
+      {
+        {   12,    6},{  106,  387},{  182,  771},{  262, 1167},
+        {  365, 1512},{  486, 1798},{  608, 2047},{  713, 2274},
+        {  824, 2479},{  945, 2655},{ 1091, 2804},{ 1231, 2941},
+        { 1346, 3073},{ 1475, 3194},{ 1633, 3282},{ 1778, 3345},
+        { 1891, 3414},{ 2013, 3501},{ 2138, 3584},{ 2266, 3640},
+        { 2428, 3701},{ 2568, 3782},{ 2674, 3863},{ 2816, 3894}
+      },
+      /*Cr  qi=39  INTER*/
+      {
+        {   88,   -7},{   92,  352},{   85,  680},{  102, 1053},
+        {  119, 1434},{  132, 1797},{  146, 2151},{  163, 2498},
+        {  185, 2830},{  211, 3147},{  243, 3451},{  285, 3735},
+        {  337, 4005},{  401, 4260},{  477, 4499},{  565, 4721},
+        {  655, 4937},{  749, 5148},{  858, 5344},{  979, 5529},
+        { 1110, 5710},{ 1264, 5871},{ 1460, 5990},{ 1677, 6086}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=40  INTRA*/
+      {
+        {   98,   71},{  491, 1274},{ 1023, 2103},{ 1641, 2559},
+        { 2257, 2877},{ 2898, 3171},{ 3566, 3429},{ 4233, 3629},
+        { 4881, 3784},{ 5499, 3906},{ 6088, 3997},{ 6631, 4040},
+        { 7145, 4060},{ 7640, 4107},{ 8128, 4178},{ 8618, 4233},
+        { 9077, 4267},{ 9514, 4304},{ 9919, 4324},{10277, 4317},
+        {10635, 4312},{10985, 4324},{11338, 4331},{11792, 4334}
+      },
+      /*Y'  qi=40  INTER*/
+      {
+        {   63,  -26},{  125, 1331},{  256, 2640},{  439, 3801},
+        {  757, 4782},{ 1391, 5474},{ 2399, 5805},{ 3582, 5870},
+        { 4678, 5824},{ 5600, 5763},{ 6386, 5710},{ 7076, 5667},
+        { 7693, 5637},{ 8252, 5610},{ 8775, 5586},{ 9255, 5571},
+        { 9694, 5556},{10115, 5541},{10530, 5530},{10903, 5522},
+        {11242, 5515},{11596, 5501},{11904, 5482},{12205, 5475}
+      }
+    },
+    {
+      /*Cb  qi=40  INTRA*/
+      {
+        {    8,    3},{  108,  371},{  189,  743},{  265, 1128},
+        {  371, 1475},{  499, 1767},{  628, 2022},{  746, 2256},
+        {  864, 2467},{  991, 2647},{ 1124, 2801},{ 1270, 2933},
+        { 1412, 3054},{ 1547, 3165},{ 1677, 3277},{ 1804, 3393},
+        { 1946, 3483},{ 2078, 3569},{ 2201, 3651},{ 2352, 3711},
+        { 2513, 3766},{ 2643, 3826},{ 2775, 3880},{ 3025, 3919}
+      },
+      /*Cb  qi=40  INTER*/
+      {
+        {  114,   35},{  104,  349},{   96,  667},{  106, 1040},
+        {  121, 1423},{  138, 1789},{  158, 2132},{  184, 2464},
+        {  212, 2787},{  242, 3095},{  279, 3389},{  321, 3671},
+        {  374, 3941},{  438, 4199},{  517, 4446},{  617, 4673},
+        {  740, 4881},{  891, 5064},{ 1058, 5225},{ 1239, 5372},
+        { 1441, 5499},{ 1638, 5610},{ 1840, 5719},{ 2076, 5814}
+      }
+    },
+    {
+      /*Cr  qi=40  INTRA*/
+      {
+        {   14,    7},{  114,  389},{  193,  771},{  283, 1156},
+        {  399, 1488},{  523, 1768},{  643, 2018},{  752, 2245},
+        {  865, 2450},{  984, 2626},{ 1139, 2763},{ 1290, 2887},
+        { 1413, 3014},{ 1550, 3128},{ 1711, 3211},{ 1865, 3268},
+        { 1981, 3334},{ 2103, 3415},{ 2237, 3486},{ 2365, 3543},
+        { 2529, 3610},{ 2666, 3700},{ 2775, 3779},{ 2929, 3803}
+      },
+      /*Cr  qi=40  INTER*/
+      {
+        {   89,   -8},{   95,  353},{   90,  681},{  107, 1053},
+        {  124, 1430},{  139, 1787},{  156, 2136},{  177, 2477},
+        {  203, 2803},{  237, 3112},{  276, 3406},{  329, 3683},
+        {  395, 3942},{  475, 4182},{  567, 4407},{  665, 4624},
+        {  767, 4834},{  879, 5032},{ 1011, 5213},{ 1169, 5375},
+        { 1348, 5525},{ 1547, 5654},{ 1785, 5743},{ 2066, 5787}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=41  INTRA*/
+      {
+        {   98,   71},{  495, 1272},{ 1040, 2090},{ 1675, 2533},
+        { 2302, 2842},{ 2953, 3132},{ 3631, 3381},{ 4309, 3574},
+        { 4966, 3726},{ 5593, 3846},{ 6189, 3934},{ 6738, 3972},
+        { 7256, 3991},{ 7754, 4036},{ 8250, 4099},{ 8747, 4150},
+        { 9207, 4185},{ 9650, 4222},{10057, 4242},{10411, 4237},
+        {10771, 4230},{11127, 4244},{11486, 4254},{11933, 4252}
+      },
+      /*Y'  qi=41  INTER*/
+      {
+        {   65,  -25},{  125, 1331},{  260, 2633},{  457, 3782},
+        {  807, 4740},{ 1499, 5397},{ 2562, 5693},{ 3766, 5743},
+        { 4859, 5695},{ 5776, 5638},{ 6556, 5590},{ 7243, 5554},
+        { 7859, 5529},{ 8417, 5506},{ 8935, 5486},{ 9419, 5473},
+        { 9869, 5460},{10296, 5446},{10711, 5436},{11089, 5430},
+        {11445, 5421},{11802, 5412},{12129, 5404},{12465, 5393}
+      }
+    },
+    {
+      /*Cb  qi=41  INTRA*/
+      {
+        {    8,    3},{  108,  371},{  189,  743},{  267, 1126},
+        {  374, 1471},{  504, 1760},{  635, 2011},{  758, 2241},
+        {  881, 2447},{ 1013, 2621},{ 1147, 2773},{ 1293, 2906},
+        { 1441, 3023},{ 1580, 3131},{ 1712, 3243},{ 1844, 3360},
+        { 1985, 3451},{ 2114, 3532},{ 2240, 3613},{ 2390, 3680},
+        { 2550, 3740},{ 2687, 3800},{ 2825, 3862},{ 3052, 3944}
+      },
+      /*Cb  qi=41  INTER*/
+      {
+        {  104,   39},{  100,  350},{   95,  667},{  105, 1040},
+        {  121, 1422},{  137, 1787},{  159, 2129},{  185, 2459},
+        {  216, 2778},{  249, 3083},{  287, 3374},{  335, 3653},
+        {  393, 3920},{  462, 4175},{  549, 4414},{  660, 4636},
+        {  791, 4839},{  952, 5014},{ 1135, 5166},{ 1337, 5297},
+        { 1552, 5411},{ 1752, 5530},{ 1972, 5634},{ 2224, 5724}
+      }
+    },
+    {
+      /*Cr  qi=41  INTRA*/
+      {
+        {   15,    7},{  115,  389},{  193,  770},{  284, 1154},
+        {  401, 1484},{  528, 1761},{  652, 2005},{  764, 2228},
+        {  882, 2427},{ 1008, 2599},{ 1167, 2734},{ 1320, 2859},
+        { 1443, 2990},{ 1580, 3103},{ 1743, 3181},{ 1894, 3241},
+        { 2012, 3309},{ 2141, 3385},{ 2272, 3459},{ 2398, 3519},
+        { 2566, 3584},{ 2707, 3680},{ 2816, 3762},{ 2991, 3770}
+      },
+      /*Cr  qi=41  INTER*/
+      {
+        {   92,   -9},{   98,  354},{   90,  682},{  107, 1052},
+        {  124, 1429},{  139, 1786},{  156, 2132},{  178, 2471},
+        {  207, 2794},{  241, 3100},{  285, 3391},{  345, 3662},
+        {  417, 3915},{  503, 4151},{  600, 4375},{  703, 4589},
+        {  815, 4791},{  942, 4981},{ 1088, 5155},{ 1250, 5316},
+        { 1432, 5462},{ 1653, 5575},{ 1930, 5639},{ 2250, 5655}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=42  INTRA*/
+      {
+        {  109,   75},{  534, 1257},{ 1114, 2047},{ 1793, 2456},
+        { 2461, 2735},{ 3157, 2994},{ 3879, 3221},{ 4595, 3396},
+        { 5282, 3531},{ 5931, 3638},{ 6546, 3714},{ 7105, 3749},
+        { 7633, 3766},{ 8147, 3803},{ 8652, 3865},{ 9148, 3915},
+        { 9613, 3946},{10075, 3976},{10489, 3997},{10835, 3994},
+        {11195, 3985},{11553, 3997},{11909, 4004},{12369, 3990}
+      },
+      /*Y'  qi=42  INTER*/
+      {
+        {   69,  -23},{  134, 1332},{  287, 2611},{  521, 3730},
+        {  970, 4624},{ 1827, 5176},{ 3028, 5382},{ 4262, 5389},
+        { 5325, 5338},{ 6214, 5291},{ 6976, 5255},{ 7651, 5228},
+        { 8260, 5206},{ 8821, 5190},{ 9343, 5177},{ 9823, 5165},
+        {10273, 5152},{10709, 5143},{11121, 5136},{11502, 5129},
+        {11857, 5125},{12193, 5115},{12520, 5107},{12802, 5097}
+      }
+    },
+    {
+      /*Cb  qi=42  INTRA*/
+      {
+        {    9,    3},{  113,  371},{  199,  743},{  279, 1123},
+        {  390, 1462},{  525, 1743},{  662, 1986},{  789, 2208},
+        {  916, 2406},{ 1057, 2571},{ 1204, 2712},{ 1362, 2835},
+        { 1524, 2943},{ 1676, 3040},{ 1815, 3145},{ 1959, 3249},
+        { 2117, 3325},{ 2249, 3406},{ 2377, 3488},{ 2537, 3547},
+        { 2706, 3597},{ 2854, 3646},{ 2999, 3705},{ 3236, 3759}
+      },
+      /*Cb  qi=42  INTER*/
+      {
+        {  114,   44},{  107,  353},{  101,  670},{  111, 1041},
+        {  129, 1418},{  148, 1775},{  174, 2110},{  208, 2432},
+        {  244, 2746},{  283, 3046},{  330, 3330},{  388, 3602},
+        {  460, 3858},{  546, 4101},{  655, 4326},{  793, 4530},
+        {  966, 4703},{ 1165, 4851},{ 1388, 4980},{ 1630, 5088},
+        { 1869, 5189},{ 2122, 5268},{ 2403, 5328},{ 2667, 5417}
+      }
+    },
+    {
+      /*Cr  qi=42  INTRA*/
+      {
+        {   15,    7},{  120,  390},{  202,  771},{  298, 1150},
+        {  421, 1473},{  553, 1743},{  681, 1982},{  796, 2199},
+        {  923, 2388},{ 1062, 2547},{ 1225, 2678},{ 1392, 2792},
+        { 1531, 2907},{ 1682, 3007},{ 1856, 3074},{ 2009, 3134},
+        { 2138, 3192},{ 2274, 3257},{ 2407, 3333},{ 2536, 3393},
+        { 2711, 3455},{ 2875, 3531},{ 3000, 3598},{ 3186, 3599}
+      },
+      /*Cr  qi=42  INTER*/
+      {
+        {   87,   -4},{   95,  358},{   97,  683},{  113, 1052},
+        {  131, 1423},{  148, 1774},{  170, 2116},{  198, 2448},
+        {  234, 2762},{  276, 3062},{  331, 3343},{  404, 3603},
+        {  494, 3844},{  598, 4067},{  715, 4276},{  842, 4471},
+        {  977, 4661},{ 1128, 4840},{ 1311, 4991},{ 1516, 5127},
+        { 1759, 5233},{ 2050, 5300},{ 2377, 5323},{ 2710, 5304}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=43  INTRA*/
+      {
+        {   99,   79},{  557, 1244},{ 1175, 2016},{ 1882, 2408},
+        { 2570, 2677},{ 3288, 2926},{ 4030, 3141},{ 4760, 3307},
+        { 5458, 3435},{ 6115, 3537},{ 6743, 3608},{ 7312, 3636},
+        { 7841, 3652},{ 8357, 3687},{ 8870, 3742},{ 9376, 3788},
+        { 9850, 3821},{10315, 3853},{10734, 3873},{11084, 3870},
+        {11442, 3862},{11800, 3874},{12160, 3879},{12618, 3876}
+      },
+      /*Y'  qi=43  INTER*/
+      {
+        {   69,  -22},{  134, 1331},{  294, 2601},{  551, 3703},
+        { 1056, 4563},{ 2003, 5061},{ 3276, 5215},{ 4534, 5194},
+        { 5599, 5133},{ 6488, 5083},{ 7257, 5044},{ 7938, 5014},
+        { 8556, 4992},{ 9124, 4975},{ 9648, 4960},{10138, 4948},
+        {10594, 4939},{11039, 4926},{11462, 4919},{11847, 4912},
+        {12216, 4904},{12570, 4896},{12883, 4889},{13189, 4879}
+      }
+    },
+    {
+      /*Cb  qi=43  INTRA*/
+      {
+        {    9,    3},{  114,  371},{  202,  740},{  294, 1110},
+        {  417, 1440},{  558, 1716},{  700, 1956},{  833, 2172},
+        {  966, 2365},{ 1116, 2524},{ 1269, 2661},{ 1431, 2781},
+        { 1599, 2885},{ 1756, 2980},{ 1902, 3082},{ 2051, 3185},
+        { 2209, 3261},{ 2337, 3342},{ 2464, 3420},{ 2633, 3475},
+        { 2809, 3525},{ 2948, 3579},{ 3094, 3633},{ 3347, 3678}
+      },
+      /*Cb  qi=43  INTER*/
+      {
+        {  111,   44},{  106,  353},{  102,  670},{  112, 1040},
+        {  128, 1416},{  148, 1771},{  176, 2104},{  211, 2424},
+        {  250, 2734},{  293, 3030},{  347, 3309},{  411, 3575},
+        {  490, 3828},{  589, 4064},{  716, 4278},{  869, 4472},
+        { 1050, 4640},{ 1264, 4781},{ 1512, 4895},{ 1775, 4991},
+        { 2042, 5069},{ 2310, 5141},{ 2593, 5207},{ 2912, 5239}
+      }
+    },
+    {
+      /*Cr  qi=43  INTRA*/
+      {
+        {   15,    7},{  121,  390},{  208,  767},{  315, 1135},
+        {  449, 1449},{  586, 1715},{  718, 1950},{  843, 2158},
+        {  977, 2342},{ 1120, 2501},{ 1290, 2632},{ 1466, 2739},
+        { 1613, 2845},{ 1763, 2945},{ 1937, 3015},{ 2093, 3070},
+        { 2225, 3126},{ 2366, 3194},{ 2501, 3267},{ 2634, 3324},
+        { 2815, 3385},{ 2964, 3466},{ 3087, 3538},{ 3263, 3555}
+      },
+      /*Cr  qi=43  INTER*/
+      {
+        {   84,   -4},{   93,  358},{   95,  683},{  113, 1052},
+        {  131, 1421},{  148, 1770},{  171, 2110},{  201, 2439},
+        {  240, 2750},{  287, 3046},{  348, 3322},{  429, 3576},
+        {  527, 3811},{  641, 4029},{  767, 4230},{  904, 4422},
+        { 1053, 4603},{ 1225, 4765},{ 1433, 4903},{ 1661, 5030},
+        { 1928, 5121},{ 2252, 5160},{ 2604, 5164},{ 2979, 5125}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=44  INTRA*/
+      {
+        {  103,   80},{  560, 1244},{ 1183, 2009},{ 1891, 2391},
+        { 2586, 2649},{ 3324, 2884},{ 4093, 3089},{ 4850, 3243},
+        { 5575, 3358},{ 6252, 3452},{ 6886, 3518},{ 7459, 3546},
+        { 7993, 3562},{ 8515, 3594},{ 9030, 3645},{ 9534, 3691},
+        {10004, 3723},{10469, 3750},{10887, 3765},{11236, 3766},
+        {11596, 3762},{11960, 3775},{12317, 3784},{12766, 3789}
+      },
+      /*Y'  qi=44  INTER*/
+      {
+        {   77,  -24},{  145, 1332},{  332, 2580},{  642, 3649},
+        { 1270, 4438},{ 2360, 4860},{ 3685, 4982},{ 4910, 4966},
+        { 5929, 4928},{ 6785, 4900},{ 7529, 4880},{ 8198, 4863},
+        { 8804, 4850},{ 9361, 4842},{ 9882, 4836},{10371, 4830},
+        {10827, 4822},{11262, 4816},{11672, 4811},{12052, 4807},
+        {12431, 4806},{12780, 4798},{13095, 4792},{13401, 4791}
+      }
+    },
+    {
+      /*Cb  qi=44  INTRA*/
+      {
+        {    9,    2},{  122,  371},{  214,  741},{  307, 1109},
+        {  433, 1432},{  576, 1704},{  718, 1939},{  855, 2152},
+        {  991, 2340},{ 1141, 2497},{ 1298, 2632},{ 1463, 2749},
+        { 1636, 2851},{ 1796, 2944},{ 1947, 3041},{ 2101, 3140},
+        { 2260, 3219},{ 2392, 3297},{ 2527, 3366},{ 2693, 3424},
+        { 2872, 3477},{ 3025, 3525},{ 3175, 3584},{ 3451, 3626}
+      },
+      /*Cb  qi=44  INTER*/
+      {
+        {  111,   14},{  110,  339},{  109,  671},{  120, 1040},
+        {  139, 1410},{  162, 1758},{  197, 2084},{  243, 2397},
+        {  291, 2702},{  342, 2992},{  405, 3265},{  484, 3521},
+        {  584, 3760},{  705, 3983},{  855, 4185},{ 1048, 4356},
+        { 1274, 4500},{ 1531, 4617},{ 1816, 4707},{ 2111, 4783},
+        { 2409, 4846},{ 2720, 4901},{ 3044, 4957},{ 3391, 4985}
+      }
+    },
+    {
+      /*Cr  qi=44  INTRA*/
+      {
+        {   17,    7},{  128,  392},{  219,  770},{  329, 1135},
+        {  465, 1442},{  601, 1703},{  734, 1935},{  862, 2142},
+        {  998, 2325},{ 1147, 2482},{ 1321, 2606},{ 1496, 2710},
+        { 1649, 2813},{ 1809, 2908},{ 1984, 2977},{ 2143, 3032},
+        { 2279, 3087},{ 2423, 3152},{ 2559, 3225},{ 2684, 3288},
+        { 2866, 3351},{ 3025, 3426},{ 3161, 3492},{ 3372, 3500}
+      },
+      /*Cr  qi=44  INTER*/
+      {
+        {   89,    0},{  101,  352},{  104,  683},{  121, 1051},
+        {  141, 1414},{  163, 1757},{  192, 2092},{  231, 2415},
+        {  278, 2720},{  336, 3007},{  412, 3273},{  510, 3516},
+        {  633, 3733},{  769, 3936},{  914, 4130},{ 1076, 4307},
+        { 1256, 4472},{ 1469, 4617},{ 1723, 4732},{ 2012, 4822},
+        { 2347, 4871},{ 2716, 4875},{ 3082, 4866},{ 3422, 4826}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=45  INTRA*/
+      {
+        {  119,   78},{  610, 1226},{ 1271, 1965},{ 2026, 2319},
+        { 2768, 2550},{ 3556, 2757},{ 4369, 2938},{ 5157, 3076},
+        { 5901, 3182},{ 6598, 3268},{ 7253, 3326},{ 7844, 3343},
+        { 8392, 3356},{ 8922, 3386},{ 9453, 3433},{ 9973, 3474},
+        {10457, 3503},{10929, 3530},{11351, 3543},{11709, 3541},
+        {12068, 3537},{12434, 3547},{12805, 3555},{13268, 3563}
+      },
+      /*Y'  qi=45  INTER*/
+      {
+        {   77,  -20},{  146, 1330},{  342, 2566},{  699, 3604},
+        { 1439, 4332},{ 2669, 4672},{ 4075, 4727},{ 5318, 4679},
+        { 6345, 4630},{ 7209, 4595},{ 7963, 4570},{ 8644, 4551},
+        { 9262, 4535},{ 9831, 4525},{10370, 4515},{10872, 4506},
+        {11334, 4500},{11783, 4492},{12219, 4489},{12617, 4483},
+        {12995, 4477},{13350, 4472},{13674, 4466},{13968, 4468}
+      }
+    },
+    {
+      /*Cb  qi=45  INTRA*/
+      {
+        {    9,    2},{  122,  370},{  219,  735},{  324, 1096},
+        {  465, 1414},{  619, 1679},{  771, 1905},{  920, 2103},
+        { 1070, 2276},{ 1236, 2419},{ 1410, 2539},{ 1595, 2644},
+        { 1784, 2736},{ 1949, 2831},{ 2104, 2931},{ 2275, 3021},
+        { 2443, 3092},{ 2586, 3166},{ 2735, 3234},{ 2904, 3288},
+        { 3093, 3338},{ 3262, 3382},{ 3419, 3427},{ 3708, 3456}
+      },
+      /*Cb  qi=45  INTER*/
+      {
+        {  103,    0},{  109,  339},{  109,  670},{  119, 1039},
+        {  137, 1408},{  162, 1754},{  199, 2076},{  248, 2386},
+        {  301, 2684},{  360, 2967},{  433, 3234},{  525, 3481},
+        {  640, 3713},{  780, 3924},{  956, 4110},{ 1176, 4266},
+        { 1438, 4390},{ 1736, 4481},{ 2057, 4553},{ 2385, 4613},
+        { 2718, 4656},{ 3056, 4698},{ 3416, 4733},{ 3799, 4755}
+      }
+    },
+    {
+      /*Cr  qi=45  INTRA*/
+      {
+        {   16,    7},{  128,  391},{  225,  763},{  350, 1120},
+        {  500, 1420},{  649, 1673},{  792, 1893},{  929, 2089},
+        { 1084, 2257},{ 1250, 2401},{ 1440, 2518},{ 1633, 2614},
+        { 1799, 2708},{ 1968, 2798},{ 2151, 2863},{ 2314, 2914},
+        { 2453, 2968},{ 2611, 3025},{ 2759, 3095},{ 2887, 3160},
+        { 3082, 3210},{ 3259, 3278},{ 3403, 3342},{ 3593, 3354}
+      },
+      /*Cr  qi=45  INTER*/
+      {
+        {   92,    0},{  101,  352},{  103,  682},{  120, 1049},
+        {  140, 1412},{  163, 1752},{  193, 2083},{  234, 2402},
+        {  287, 2702},{  353, 2983},{  442, 3240},{  557, 3471},
+        {  694, 3680},{  846, 3873},{ 1014, 4056},{ 1200, 4224},
+        { 1414, 4369},{ 1664, 4495},{ 1946, 4595},{ 2278, 4654},
+        { 2654, 4673},{ 3047, 4658},{ 3438, 4627},{ 3825, 4585}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=46  INTRA*/
+      {
+        {  119,   78},{  610, 1227},{ 1277, 1960},{ 2043, 2309},
+        { 2805, 2529},{ 3618, 2719},{ 4452, 2887},{ 5257, 3016},
+        { 6017, 3115},{ 6727, 3195},{ 7392, 3248},{ 7984, 3267},
+        { 8528, 3281},{ 9059, 3310},{ 9593, 3354},{10119, 3395},
+        {10599, 3425},{11064, 3450},{11493, 3464},{11850, 3466},
+        {12207, 3462},{12578, 3471},{12948, 3480},{13407, 3487}
+      },
+      /*Y'  qi=46  INTER*/
+      {
+        {   74,  -14},{  149, 1326},{  382, 2538},{  807, 3541},
+        { 1670, 4211},{ 3000, 4499},{ 4416, 4533},{ 5628, 4490},
+        { 6628, 4453},{ 7479, 4425},{ 8228, 4406},{ 8902, 4393},
+        { 9521, 4380},{10090, 4371},{10623, 4364},{11124, 4356},
+        {11586, 4351},{12043, 4344},{12476, 4341},{12863, 4340},
+        {13244, 4337},{13610, 4329},{13936, 4324},{14246, 4329}
+      }
+    },
+    {
+      /*Cb  qi=46  INTRA*/
+      {
+        {   11,    2},{  132,  371},{  234,  737},{  340, 1094},
+        {  481, 1405},{  637, 1667},{  791, 1891},{  944, 2084},
+        { 1099, 2253},{ 1268, 2392},{ 1444, 2507},{ 1633, 2610},
+        { 1825, 2700},{ 1990, 2794},{ 2147, 2895},{ 2321, 2984},
+        { 2493, 3053},{ 2640, 3126},{ 2787, 3198},{ 2954, 3253},
+        { 3146, 3297},{ 3313, 3344},{ 3473, 3393},{ 3757, 3434}
+      },
+      /*Cb  qi=46  INTER*/
+      {
+        {   97,    0},{  109,  339},{  108,  669},{  120, 1035},
+        {  142, 1398},{  173, 1737},{  221, 2052},{  281, 2353},
+        {  345, 2646},{  415, 2924},{  504, 3183},{  616, 3421},
+        {  749, 3643},{  914, 3842},{ 1123, 4012},{ 1379, 4150},
+        { 1685, 4250},{ 2014, 4327},{ 2366, 4382},{ 2731, 4426},
+        { 3083, 4470},{ 3445, 4490},{ 3805, 4511},{ 4146, 4539}
+      }
+    },
+    {
+      /*Cr  qi=46  INTRA*/
+      {
+        {   19,    7},{  137,  393},{  237,  765},{  364, 1116},
+        {  516, 1411},{  665, 1662},{  809, 1880},{  951, 2072},
+        { 1109, 2236},{ 1278, 2378},{ 1474, 2491},{ 1669, 2584},
+        { 1835, 2678},{ 2014, 2766},{ 2203, 2828},{ 2366, 2880},
+        { 2506, 2933},{ 2661, 2988},{ 2810, 3053},{ 2941, 3116},
+        { 3131, 3175},{ 3310, 3243},{ 3461, 3303},{ 3656, 3321}
+      },
+      /*Cr  qi=46  INTER*/
+      {
+        {   91,    1},{  103,  351},{  104,  681},{  121, 1046},
+        {  144, 1401},{  173, 1736},{  213, 2060},{  265, 2373},
+        {  330, 2666},{  410, 2938},{  517, 3185},{  655, 3404},
+        {  815, 3601},{  989, 3784},{ 1183, 3951},{ 1400, 4104},
+        { 1649, 4241},{ 1933, 4352},{ 2261, 4427},{ 2646, 4458},
+        { 3057, 4446},{ 3453, 4418},{ 3820, 4385},{ 4171, 4352}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=47  INTRA*/
+      {
+        {  117,   83},{  670, 1205},{ 1408, 1904},{ 2239, 2219},
+        { 3049, 2414},{ 3905, 2584},{ 4775, 2734},{ 5610, 2852},
+        { 6393, 2944},{ 7121, 3017},{ 7804, 3066},{ 8407, 3081},
+        { 8957, 3093},{ 9498, 3119},{10043, 3160},{10582, 3199},
+        {11083, 3226},{11561, 3250},{11993, 3263},{12352, 3264},
+        {12711, 3259},{13092, 3266},{13463, 3271},{13918, 3275}
+      },
+      /*Y'  qi=47  INTER*/
+      {
+        {   74,  -11},{  148, 1325},{  404, 2518},{  910, 3478},
+        { 1916, 4080},{ 3369, 4298},{ 4823, 4292},{ 6035, 4238},
+        { 7037, 4197},{ 7894, 4168},{ 8650, 4146},{ 9337, 4129},
+        { 9968, 4116},{10549, 4105},{11096, 4096},{11605, 4089},
+        {12081, 4083},{12547, 4076},{12990, 4070},{13399, 4070},
+        {13776, 4065},{14133, 4059},{14486, 4057},{14842, 4053}
+      }
+    },
+    {
+      /*Cb  qi=47  INTRA*/
+      {
+        {   11,    2},{  133,  370},{  242,  731},{  367, 1077},
+        {  524, 1378},{  692, 1630},{  860, 1844},{ 1028, 2024},
+        { 1203, 2178},{ 1393, 2305},{ 1582, 2413},{ 1787, 2507},
+        { 1992, 2590},{ 2175, 2676},{ 2351, 2767},{ 2534, 2851},
+        { 2707, 2923},{ 2862, 2994},{ 3021, 3060},{ 3193, 3111},
+        { 3396, 3147},{ 3573, 3184},{ 3752, 3220},{ 4038, 3255}
+      },
+      /*Cb  qi=47  INTER*/
+      {
+        {  101,    0},{  107,  339},{  108,  667},{  120, 1033},
+        {  142, 1394},{  175, 1729},{  227, 2040},{  295, 2335},
+        {  369, 2619},{  452, 2888},{  556, 3138},{  686, 3368},
+        {  850, 3574},{ 1050, 3758},{ 1299, 3910},{ 1605, 4024},
+        { 1950, 4104},{ 2317, 4163},{ 2689, 4210},{ 3077, 4239},
+        { 3466, 4258},{ 3840, 4278},{ 4205, 4298},{ 4515, 4340}
+      }
+    },
+    {
+      /*Cr  qi=47  INTRA*/
+      {
+        {   19,    7},{  138,  392},{  248,  758},{  396, 1094},
+        {  563, 1378},{  723, 1621},{  881, 1829},{ 1037, 2011},
+        { 1214, 2165},{ 1410, 2290},{ 1623, 2393},{ 1834, 2480},
+        { 2016, 2564},{ 2203, 2647},{ 2405, 2707},{ 2569, 2757},
+        { 2709, 2810},{ 2871, 2860},{ 3027, 2924},{ 3178, 2980},
+        { 3375, 3034},{ 3563, 3097},{ 3724, 3151},{ 3952, 3153}
+      },
+      /*Cr  qi=47  INTER*/
+      {
+        {   91,    1},{  100,  351},{  102,  681},{  120, 1043},
+        {  144, 1397},{  175, 1729},{  219, 2049},{  277, 2356},
+        {  353, 2640},{  451, 2902},{  579, 3136},{  739, 3342},
+        {  926, 3525},{ 1125, 3698},{ 1343, 3859},{ 1595, 3998},
+        { 1881, 4113},{ 2208, 4205},{ 2589, 4253},{ 3014, 4250},
+        { 3444, 4220},{ 3838, 4183},{ 4196, 4147},{ 4521, 4116}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=48  INTRA*/
+      {
+        {  107,   87},{  681, 1200},{ 1456, 1883},{ 2306, 2193},
+        { 3122, 2386},{ 3984, 2548},{ 4862, 2693},{ 5704, 2808},
+        { 6495, 2899},{ 7232, 2970},{ 7915, 3018},{ 8524, 3034},
+        { 9085, 3043},{ 9635, 3068},{10192, 3108},{10735, 3145},
+        {11237, 3171},{11719, 3194},{12153, 3207},{12516, 3206},
+        {12888, 3202},{13266, 3210},{13637, 3218},{14101, 3219}
+      },
+      /*Y'  qi=48  INTER*/
+      {
+        {   83,  -18},{  147, 1328},{  398, 2519},{  923, 3468},
+        { 1979, 4047},{ 3472, 4246},{ 4936, 4232},{ 6148, 4178},
+        { 7150, 4139},{ 8007, 4111},{ 8765, 4091},{ 9458, 4076},
+        {10090, 4063},{10676, 4054},{11226, 4045},{11742, 4038},
+        {12223, 4033},{12686, 4029},{13127, 4022},{13527, 4015},
+        {13915, 4012},{14277, 4007},{14619, 4004},{14966, 4001}
+      }
+    },
+    {
+      /*Cb  qi=48  INTRA*/
+      {
+        {   11,    2},{  134,  369},{  245,  730},{  373, 1075},
+        {  531, 1374},{  698, 1625},{  865, 1839},{ 1033, 2019},
+        { 1207, 2173},{ 1397, 2300},{ 1588, 2408},{ 1795, 2501},
+        { 2003, 2581},{ 2187, 2666},{ 2362, 2757},{ 2548, 2841},
+        { 2719, 2912},{ 2876, 2983},{ 3034, 3047},{ 3209, 3097},
+        { 3409, 3137},{ 3589, 3178},{ 3762, 3216},{ 4004, 3252}
+      },
+      /*Cb  qi=48  INTER*/
+      {
+        {  113,   26},{  112,  344},{  111,  668},{  120, 1032},
+        {  141, 1392},{  173, 1727},{  224, 2036},{  290, 2330},
+        {  363, 2612},{  447, 2880},{  551, 3130},{  685, 3358},
+        {  852, 3563},{ 1061, 3742},{ 1332, 3884},{ 1654, 3993},
+        { 2011, 4068},{ 2394, 4120},{ 2782, 4160},{ 3172, 4186},
+        { 3557, 4209},{ 3932, 4228},{ 4306, 4237},{ 4675, 4236}
+      }
+    },
+    {
+      /*Cr  qi=48  INTRA*/
+      {
+        {   18,    7},{  139,  389},{  252,  755},{  404, 1090},
+        {  573, 1372},{  732, 1615},{  889, 1823},{ 1045, 2005},
+        { 1222, 2159},{ 1417, 2285},{ 1631, 2387},{ 1843, 2474},
+        { 2027, 2558},{ 2212, 2639},{ 2413, 2697},{ 2578, 2746},
+        { 2720, 2798},{ 2887, 2852},{ 3040, 2913},{ 3181, 2970},
+        { 3381, 3024},{ 3581, 3081},{ 3743, 3130},{ 3948, 3133}
+      },
+      /*Cr  qi=48  INTER*/
+      {
+        {   89,    0},{  106,  352},{  105,  682},{  120, 1044},
+        {  144, 1395},{  174, 1724},{  215, 2044},{  270, 2350},
+        {  343, 2635},{  441, 2895},{  571, 3129},{  735, 3334},
+        {  926, 3518},{ 1139, 3684},{ 1371, 3836},{ 1628, 3977},
+        { 1933, 4089},{ 2279, 4164},{ 2672, 4204},{ 3105, 4205},
+        { 3533, 4176},{ 3931, 4135},{ 4290, 4089},{ 4624, 4057}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=49  INTRA*/
+      {
+        {  120,   85},{  706, 1194},{ 1485, 1875},{ 2348, 2187},
+        { 3190, 2372},{ 4076, 2521},{ 4967, 2658},{ 5819, 2771},
+        { 6611, 2861},{ 7345, 2936},{ 8026, 2990},{ 8626, 3013},
+        { 9182, 3030},{ 9723, 3059},{10266, 3100},{10802, 3143},
+        {11293, 3179},{11768, 3206},{12201, 3221},{12556, 3225},
+        {12914, 3226},{13281, 3237},{13639, 3247},{14089, 3257}
+      },
+      /*Y'  qi=49  INTER*/
+      {
+        {   72,  -11},{  155, 1320},{  458, 2485},{ 1090, 3386},
+        { 2284, 3907},{ 3835, 4075},{ 5272, 4064},{ 6449, 4026},
+        { 7426, 4003},{ 8267, 3987},{ 9017, 3976},{ 9698, 3967},
+        {10328, 3962},{10913, 3959},{11452, 3954},{11961, 3950},
+        {12442, 3947},{12904, 3946},{13347, 3945},{13749, 3943},
+        {14123, 3941},{14490, 3941},{14826, 3939},{15153, 3937}
+      }
+    },
+    {
+      /*Cb  qi=49  INTRA*/
+      {
+        {   11,    2},{  145,  369},{  262,  729},{  393, 1070},
+        {  557, 1363},{  731, 1607},{  907, 1811},{ 1085, 1983},
+        { 1268, 2130},{ 1465, 2251},{ 1658, 2359},{ 1868, 2454},
+        { 2079, 2534},{ 2264, 2621},{ 2440, 2717},{ 2625, 2802},
+        { 2792, 2878},{ 2945, 2954},{ 3106, 3021},{ 3277, 3075},
+        { 3466, 3119},{ 3638, 3170},{ 3824, 3213},{ 4100, 3243}
+      },
+      /*Cb  qi=49  INTER*/
+      {
+        {   98,   -6},{  113,  343},{  110,  669},{  122, 1029},
+        {  149, 1380},{  192, 1706},{  258, 2007},{  340, 2293},
+        {  426, 2569},{  525, 2831},{  653, 3071},{  814, 3287},
+        { 1013, 3478},{ 1262, 3637},{ 1575, 3761},{ 1936, 3851},
+        { 2328, 3910},{ 2741, 3949},{ 3163, 3970},{ 3559, 3994},
+        { 3936, 4025},{ 4300, 4050},{ 4655, 4060},{ 4962, 4062}
+      }
+    },
+    {
+      /*Cr  qi=49  INTRA*/
+      {
+        {   19,    7},{  151,  389},{  270,  753},{  427, 1084},
+        {  602, 1360},{  767, 1595},{  933, 1794},{ 1098, 1968},
+        { 1285, 2115},{ 1489, 2237},{ 1699, 2342},{ 1912, 2435},
+        { 2101, 2519},{ 2288, 2601},{ 2486, 2663},{ 2651, 2715},
+        { 2799, 2769},{ 2958, 2825},{ 3106, 2890},{ 3257, 2948},
+        { 3452, 3007},{ 3634, 3075},{ 3786, 3136},{ 3959, 3164}
+      },
+      /*Cr  qi=49  INTER*/
+      {
+        {   85,    1},{  103,  352},{  104,  681},{  121, 1039},
+        {  152, 1382},{  195, 1702},{  248, 2015},{  316, 2316},
+        {  403, 2595},{  520, 2847},{  676, 3068},{  870, 3258},
+        { 1091, 3429},{ 1329, 3585},{ 1597, 3725},{ 1894, 3849},
+        { 2242, 3940},{ 2656, 3984},{ 3098, 3992},{ 3531, 3981},
+        { 3936, 3950},{ 4304, 3915},{ 4646, 3879},{ 4915, 3861}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=50  INTRA*/
+      {
+        {  122,   89},{  798, 1170},{ 1682, 1812},{ 2613, 2096},
+        { 3501, 2260},{ 4430, 2388},{ 5352, 2510},{ 6228, 2613},
+        { 7043, 2698},{ 7793, 2770},{ 8486, 2823},{ 9092, 2846},
+        { 9652, 2865},{10210, 2895},{10773, 2936},{11315, 2979},
+        {11817, 3014},{12297, 3041},{12734, 3057},{13097, 3064},
+        {13443, 3067},{13813, 3078},{14190, 3088},{14646, 3103}
+      },
+      /*Y'  qi=50  INTER*/
+      {
+        {   73,  -11},{  154, 1318},{  501, 2457},{ 1281, 3291},
+        { 2685, 3719},{ 4356, 3810},{ 5811, 3769},{ 6988, 3726},
+        { 7976, 3700},{ 8835, 3682},{ 9606, 3669},{10307, 3659},
+        {10953, 3652},{11556, 3645},{12115, 3643},{12641, 3640},
+        {13138, 3636},{13613, 3634},{14068, 3629},{14488, 3627},
+        {14876, 3625},{15237, 3621},{15585, 3623},{15922, 3629}
+      }
+    },
+    {
+      /*Cb  qi=50  INTRA*/
+      {
+        {   11,    2},{  148,  368},{  278,  724},{  431, 1052},
+        {  613, 1334},{  806, 1567},{ 1004, 1756},{ 1203, 1915},
+        { 1405, 2051},{ 1621, 2163},{ 1833, 2262},{ 2059, 2347},
+        { 2280, 2424},{ 2476, 2512},{ 2670, 2598},{ 2864, 2679},
+        { 3037, 2754},{ 3201, 2826},{ 3376, 2887},{ 3562, 2936},
+        { 3756, 2976},{ 3932, 3022},{ 4117, 3065},{ 4385, 3094}
+      },
+      /*Cb  qi=50  INTER*/
+      {
+        {   92,   -3},{  112,  343},{  109,  669},{  121, 1027},
+        {  149, 1375},{  196, 1697},{  270, 1992},{  366, 2267},
+        {  471, 2532},{  594, 2782},{  747, 3011},{  942, 3212},
+        { 1189, 3384},{ 1497, 3521},{ 1875, 3613},{ 2297, 3673},
+        { 2739, 3710},{ 3195, 3725},{ 3644, 3737},{ 4057, 3751},
+        { 4445, 3763},{ 4841, 3769},{ 5211, 3779},{ 5568, 3769}
+      }
+    },
+    {
+      /*Cr  qi=50  INTRA*/
+      {
+        {   19,    7},{  155,  388},{  290,  744},{  474, 1060},
+        {  666, 1324},{  847, 1549},{ 1033, 1737},{ 1219, 1898},
+        { 1428, 2034},{ 1653, 2147},{ 1885, 2245},{ 2115, 2329},
+        { 2316, 2410},{ 2517, 2486},{ 2730, 2539},{ 2901, 2586},
+        { 3042, 2638},{ 3199, 2693},{ 3366, 2755},{ 3534, 2805},
+        { 3738, 2858},{ 3934, 2916},{ 4079, 2975},{ 4257, 2992}
+      },
+      /*Cr  qi=50  INTER*/
+      {
+        {   87,    1},{  102,  353},{  103,  680},{  121, 1036},
+        {  153, 1377},{  199, 1694},{  260, 1999},{  339, 2291},
+        {  446, 2559},{  590, 2797},{  780, 3003},{ 1010, 3176},
+        { 1267, 3331},{ 1547, 3474},{ 1874, 3594},{ 2245, 3688},
+        { 2666, 3742},{ 3130, 3758},{ 3594, 3748},{ 4028, 3711},
+        { 4415, 3674},{ 4771, 3641},{ 5122, 3605},{ 5482, 3569}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=51  INTRA*/
+      {
+        {  115,   93},{  819, 1164},{ 1739, 1806},{ 2695, 2101},
+        { 3612, 2257},{ 4552, 2374},{ 5479, 2490},{ 6352, 2593},
+        { 7158, 2683},{ 7898, 2761},{ 8580, 2823},{ 9177, 2854},
+        { 9728, 2880},{10268, 2917},{10816, 2966},{11350, 3016},
+        {11834, 3058},{12311, 3089},{12741, 3109},{13092, 3119},
+        {13434, 3126},{13791, 3142},{14156, 3155},{14590, 3171}
+      },
+      /*Y'  qi=51  INTER*/
+      {
+        {   58,    0},{  171, 1307},{  610, 2407},{ 1563, 3175},
+        { 3116, 3545},{ 4789, 3624},{ 6185, 3602},{ 7320, 3583},
+        { 8282, 3574},{ 9124, 3569},{ 9878, 3567},{10569, 3565},
+        {11207, 3563},{11801, 3564},{12359, 3566},{12884, 3567},
+        {13373, 3568},{13841, 3567},{14289, 3566},{14699, 3568},
+        {15086, 3568},{15446, 3566},{15788, 3564},{16103, 3568}
+      }
+    },
+    {
+      /*Cb  qi=51  INTRA*/
+      {
+        {   14,    3},{  161,  369},{  297,  722},{  454, 1047},
+        {  639, 1325},{  833, 1554},{ 1033, 1742},{ 1236, 1897},
+        { 1440, 2032},{ 1653, 2148},{ 1860, 2253},{ 2077, 2347},
+        { 2288, 2432},{ 2476, 2525},{ 2661, 2621},{ 2841, 2714},
+        { 3010, 2797},{ 3170, 2876},{ 3333, 2945},{ 3510, 3000},
+        { 3696, 3054},{ 3865, 3114},{ 4046, 3164},{ 4317, 3200}
+      },
+      /*Cb  qi=51  INTER*/
+      {
+        {   88,  -11},{  109,  341},{  109,  668},{  126, 1019},
+        {  168, 1358},{  233, 1670},{  329, 1955},{  451, 2219},
+        {  584, 2472},{  736, 2711},{  931, 2923},{ 1179, 3104},
+        { 1480, 3254},{ 1846, 3368},{ 2265, 3448},{ 2714, 3501},
+        { 3180, 3524},{ 3638, 3529},{ 4074, 3543},{ 4485, 3560},
+        { 4868, 3571},{ 5238, 3581},{ 5597, 3594},{ 5953, 3591}
+      }
+    },
+    {
+      /*Cr  qi=51  INTRA*/
+      {
+        {   24,    7},{  168,  388},{  309,  742},{  496, 1054},
+        {  688, 1316},{  873, 1538},{ 1063, 1723},{ 1252, 1882},
+        { 1460, 2018},{ 1682, 2134},{ 1907, 2238},{ 2125, 2332},
+        { 2317, 2422},{ 2507, 2510},{ 2705, 2575},{ 2869, 2630},
+        { 3015, 2684},{ 3178, 2744},{ 3329, 2815},{ 3477, 2878},
+        { 3667, 2945},{ 3848, 3016},{ 3997, 3082},{ 4174, 3121}
+      },
+      /*Cr  qi=51  INTER*/
+      {
+        {   83,   -2},{  102,  351},{  102,  680},{  126, 1029},
+        {  172, 1359},{  238, 1665},{  321, 1962},{  422, 2246},
+        {  552, 2505},{  733, 2728},{  970, 2912},{ 1247, 3069},
+        { 1552, 3209},{ 1876, 3338},{ 2251, 3440},{ 2692, 3502},
+        { 3161, 3529},{ 3637, 3525},{ 4084, 3509},{ 4487, 3479},
+        { 4850, 3444},{ 5181, 3419},{ 5507, 3406},{ 5786, 3398}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=52  INTRA*/
+      {
+        {  117,   93},{  814, 1168},{ 1729, 1822},{ 2706, 2119},
+        { 3655, 2262},{ 4604, 2374},{ 5528, 2490},{ 6394, 2596},
+        { 7189, 2691},{ 7921, 2777},{ 8596, 2846},{ 9184, 2885},
+        { 9728, 2918},{10260, 2961},{10796, 3014},{11316, 3069},
+        {11793, 3115},{12267, 3150},{12692, 3172},{13037, 3185},
+        {13367, 3196},{13717, 3214},{14087, 3227},{14521, 3249}
+      },
+      /*Y'  qi=52  INTER*/
+      {
+        {   52,    0},{  169, 1308},{  668, 2382},{ 1735, 3112},
+        { 3384, 3451},{ 5077, 3519},{ 6461, 3506},{ 7587, 3496},
+        { 8545, 3494},{ 9384, 3494},{10142, 3498},{10838, 3501},
+        {11475, 3503},{12078, 3508},{12640, 3511},{13162, 3513},
+        {13654, 3517},{14130, 3521},{14576, 3522},{14980, 3523},
+        {15369, 3523},{15737, 3522},{16071, 3521},{16382, 3516}
+      }
+    },
+    {
+      /*Cb  qi=52  INTRA*/
+      {
+        {   14,    3},{  163,  369},{  299,  722},{  457, 1044},
+        {  645, 1319},{  843, 1545},{ 1050, 1728},{ 1261, 1879},
+        { 1468, 2013},{ 1678, 2132},{ 1883, 2240},{ 2093, 2338},
+        { 2301, 2428},{ 2488, 2523},{ 2667, 2619},{ 2843, 2718},
+        { 3010, 2805},{ 3163, 2887},{ 3323, 2963},{ 3490, 3028},
+        { 3665, 3087},{ 3841, 3145},{ 4011, 3197},{ 4289, 3230}
+      },
+      /*Cb  qi=52  INTER*/
+      {
+        {   98,   -7},{  109,  342},{  109,  668},{  126, 1018},
+        {  170, 1355},{  242, 1663},{  352, 1941},{  490, 2195},
+        {  642, 2439},{  823, 2666},{ 1052, 2868},{ 1333, 3039},
+        { 1670, 3178},{ 2074, 3280},{ 2524, 3348},{ 2996, 3390},
+        { 3469, 3410},{ 3923, 3420},{ 4355, 3434},{ 4771, 3451},
+        { 5166, 3468},{ 5532, 3483},{ 5885, 3499},{ 6263, 3501}
+      }
+    },
+    {
+      /*Cr  qi=52  INTRA*/
+      {
+        {   25,    7},{  170,  388},{  312,  741},{  500, 1051},
+        {  694, 1310},{  883, 1529},{ 1082, 1709},{ 1280, 1864},
+        { 1491, 1998},{ 1710, 2117},{ 1932, 2225},{ 2143, 2324},
+        { 2328, 2418},{ 2516, 2506},{ 2708, 2578},{ 2870, 2637},
+        { 3017, 2693},{ 3170, 2758},{ 3312, 2835},{ 3455, 2901},
+        { 3644, 2972},{ 3827, 3049},{ 3968, 3121},{ 4115, 3166}
+      },
+      /*Cr  qi=52  INTER*/
+      {
+        {   86,   -2},{  101,  352},{  100,  680},{  126, 1028},
+        {  175, 1356},{  247, 1657},{  341, 1948},{  458, 2224},
+        {  615, 2471},{  828, 2681},{ 1091, 2857},{ 1395, 3008},
+        { 1732, 3140},{ 2095, 3257},{ 2502, 3348},{ 2968, 3402},
+        { 3457, 3420},{ 3926, 3413},{ 4360, 3388},{ 4759, 3357},
+        { 5128, 3329},{ 5449, 3306},{ 5741, 3295},{ 6071, 3296}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=53  INTRA*/
+      {
+        {  138,   93},{  850, 1161},{ 1773, 1810},{ 2763, 2103},
+        { 3722, 2245},{ 4675, 2360},{ 5600, 2483},{ 6464, 2597},
+        { 7255, 2700},{ 7982, 2792},{ 8652, 2867},{ 9237, 2913},
+        { 9775, 2950},{10302, 2998},{10834, 3058},{11347, 3121},
+        {11826, 3169},{12299, 3207},{12713, 3235},{13054, 3250},
+        {13387, 3265},{13744, 3286},{14110, 3302},{14515, 3323}
+      },
+      /*Y'  qi=53  INTER*/
+      {
+        {   52,    2},{  169, 1308},{  680, 2377},{ 1763, 3103},
+        { 3410, 3450},{ 5094, 3531},{ 6469, 3526},{ 7590, 3525},
+        { 8547, 3530},{ 9385, 3534},{10139, 3540},{10835, 3548},
+        {11479, 3553},{12075, 3559},{12634, 3565},{13159, 3570},
+        {13650, 3573},{14124, 3576},{14575, 3580},{14993, 3583},
+        {15375, 3584},{15744, 3584},{16091, 3583},{16421, 3586}
+      }
+    },
+    {
+      /*Cb  qi=53  INTRA*/
+      {
+        {   14,    3},{  167,  367},{  317,  717},{  492, 1033},
+        {  687, 1306},{  887, 1531},{ 1095, 1715},{ 1309, 1866},
+        { 1517, 2000},{ 1729, 2119},{ 1932, 2227},{ 2146, 2325},
+        { 2358, 2414},{ 2544, 2511},{ 2724, 2611},{ 2902, 2711},
+        { 3070, 2800},{ 3227, 2878},{ 3381, 2954},{ 3548, 3021},
+        { 3724, 3077},{ 3888, 3140},{ 4065, 3196},{ 4359, 3225}
+      },
+      /*Cb  qi=53  INTER*/
+      {
+        {   93,   -8},{  110,  342},{  108,  668},{  125, 1018},
+        {  170, 1355},{  242, 1663},{  353, 1939},{  494, 2192},
+        {  651, 2433},{  838, 2658},{ 1076, 2856},{ 1368, 3022},
+        { 1716, 3158},{ 2123, 3260},{ 2575, 3330},{ 3042, 3373},
+        { 3507, 3396},{ 3962, 3413},{ 4394, 3430},{ 4797, 3452},
+        { 5169, 3476},{ 5547, 3496},{ 5914, 3510},{ 6235, 3525}
+      }
+    },
+    {
+      /*Cr  qi=53  INTRA*/
+      {
+        {   25,    7},{  175,  386},{  335,  734},{  541, 1037},
+        {  737, 1296},{  926, 1516},{ 1125, 1696},{ 1324, 1851},
+        { 1540, 1984},{ 1763, 2102},{ 1989, 2210},{ 2202, 2310},
+        { 2386, 2404},{ 2572, 2495},{ 2768, 2569},{ 2929, 2627},
+        { 3071, 2684},{ 3231, 2749},{ 3374, 2825},{ 3514, 2894},
+        { 3703, 2963},{ 3882, 3040},{ 4024, 3111},{ 4190, 3150}
+      },
+      /*Cr  qi=53  INTER*/
+      {
+        {   87,   -1},{   99,  352},{  100,  680},{  125, 1027},
+        {  175, 1355},{  249, 1657},{  343, 1946},{  462, 2220},
+        {  624, 2465},{  844, 2671},{ 1122, 2841},{ 1435, 2989},
+        { 1768, 3125},{ 2134, 3243},{ 2545, 3334},{ 3002, 3393},
+        { 3490, 3412},{ 3965, 3405},{ 4401, 3384},{ 4797, 3359},
+        { 5156, 3328},{ 5482, 3297},{ 5800, 3292},{ 6135, 3293}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=54  INTRA*/
+      {
+        {  184,   94},{  902, 1151},{ 1876, 1776},{ 2881, 2057},
+        { 3832, 2200},{ 4785, 2315},{ 5709, 2442},{ 6570, 2562},
+        { 7362, 2672},{ 8092, 2771},{ 8760, 2852},{ 9337, 2901},
+        { 9874, 2943},{10402, 2995},{10928, 3059},{11443, 3126},
+        {11926, 3178},{12396, 3220},{12805, 3251},{13139, 3266},
+        {13466, 3280},{13822, 3304},{14184, 3322},{14585, 3342}
+      },
+      /*Y'  qi=54  INTER*/
+      {
+        {   60,    5},{  169, 1308},{  683, 2375},{ 1791, 3090},
+        { 3478, 3412},{ 5184, 3470},{ 6568, 3455},{ 7697, 3446},
+        { 8659, 3446},{ 9503, 3447},{10266, 3450},{10971, 3454},
+        {11619, 3458},{12223, 3462},{12789, 3467},{13315, 3471},
+        {13811, 3475},{14291, 3479},{14743, 3479},{15148, 3481},
+        {15535, 3483},{15913, 3481},{16252, 3479},{16569, 3472}
+      }
+    },
+    {
+      /*Cb  qi=54  INTRA*/
+      {
+        {   13,    2},{  165,  367},{  318,  715},{  498, 1030},
+        {  698, 1301},{  906, 1523},{ 1121, 1703},{ 1336, 1853},
+        { 1549, 1984},{ 1765, 2100},{ 1974, 2207},{ 2192, 2306},
+        { 2402, 2396},{ 2587, 2493},{ 2773, 2591},{ 2953, 2691},
+        { 3119, 2778},{ 3277, 2858},{ 3430, 2940},{ 3603, 3004},
+        { 3788, 3059},{ 3950, 3121},{ 4128, 3173},{ 4398, 3215}
+      },
+      /*Cb  qi=54  INTER*/
+      {
+        {  100,   -3},{  109,  343},{  107,  668},{  125, 1018},
+        {  169, 1354},{  241, 1662},{  353, 1938},{  496, 2190},
+        {  655, 2431},{  843, 2655},{ 1082, 2851},{ 1381, 3015},
+        { 1739, 3146},{ 2154, 3243},{ 2610, 3310},{ 3094, 3344},
+        { 3581, 3358},{ 4034, 3371},{ 4457, 3384},{ 4867, 3399},
+        { 5255, 3413},{ 5630, 3425},{ 6003, 3440},{ 6346, 3440}
+      }
+    },
+    {
+      /*Cr  qi=54  INTRA*/
+      {
+        {   23,    7},{  174,  386},{  338,  732},{  549, 1034},
+        {  751, 1289},{  947, 1506},{ 1150, 1685},{ 1353, 1837},
+        { 1572, 1969},{ 1800, 2087},{ 2031, 2192},{ 2248, 2291},
+        { 2434, 2387},{ 2622, 2477},{ 2815, 2549},{ 2976, 2607},
+        { 3126, 2663},{ 3286, 2727},{ 3427, 2807},{ 3569, 2877},
+        { 3761, 2941},{ 3942, 3016},{ 4084, 3093},{ 4226, 3131}
+      },
+      /*Cr  qi=54  INTER*/
+      {
+        {   88,   -2},{   99,  351},{  100,  680},{  125, 1027},
+        {  175, 1354},{  248, 1656},{  343, 1945},{  463, 2219},
+        {  626, 2463},{  850, 2668},{ 1128, 2837},{ 1445, 2983},
+        { 1791, 3111},{ 2168, 3224},{ 2597, 3309},{ 3075, 3351},
+        { 3560, 3364},{ 4029, 3356},{ 4464, 3335},{ 4858, 3307},
+        { 5218, 3275},{ 5547, 3256},{ 5850, 3247},{ 6171, 3214}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=55  INTRA*/
+      {
+        {  178,   95},{  968, 1137},{ 2000, 1747},{ 3013, 2027},
+        { 3966, 2173},{ 4920, 2294},{ 5842, 2427},{ 6702, 2553},
+        { 7489, 2668},{ 8213, 2773},{ 8875, 2858},{ 9452, 2913},
+        { 9986, 2959},{10504, 3016},{11023, 3085},{11530, 3157},
+        {12011, 3213},{12480, 3257},{12882, 3291},{13214, 3310},
+        {13542, 3325},{13890, 3350},{14248, 3371},{14671, 3398}
+      },
+      /*Y'  qi=55  INTER*/
+      {
+        {   59,    5},{  170, 1307},{  725, 2358},{ 1886, 3058},
+        { 3589, 3385},{ 5284, 3459},{ 6654, 3458},{ 7771, 3461},
+        { 8727, 3470},{ 9564, 3478},{10322, 3488},{11019, 3497},
+        {11658, 3505},{12258, 3513},{12819, 3520},{13344, 3527},
+        {13840, 3533},{14314, 3537},{14755, 3541},{15161, 3544},
+        {15552, 3548},{15916, 3548},{16257, 3548},{16576, 3540}
+      }
+    },
+    {
+      /*Cb  qi=55  INTRA*/
+      {
+        {   13,    2},{  167,  366},{  322,  714},{  508, 1026},
+        {  716, 1292},{  930, 1511},{ 1148, 1690},{ 1366, 1839},
+        { 1578, 1972},{ 1793, 2090},{ 2001, 2199},{ 2217, 2300},
+        { 2427, 2393},{ 2609, 2495},{ 2784, 2600},{ 2961, 2704},
+        { 3121, 2797},{ 3268, 2884},{ 3423, 2965},{ 3590, 3032},
+        { 3764, 3096},{ 3926, 3165},{ 4101, 3223},{ 4405, 3258}
+      },
+      /*Cb  qi=55  INTER*/
+      {
+        {   90,   -4},{  109,  344},{  107,  668},{  126, 1017},
+        {  172, 1351},{  249, 1657},{  370, 1928},{  527, 2174},
+        {  702, 2407},{  909, 2624},{ 1170, 2814},{ 1493, 2970},
+        { 1869, 3097},{ 2292, 3192},{ 2752, 3258},{ 3232, 3295},
+        { 3709, 3314},{ 4156, 3335},{ 4592, 3355},{ 5004, 3373},
+        { 5377, 3389},{ 5737, 3411},{ 6092, 3432},{ 6473, 3423}
+      }
+    },
+    {
+      /*Cr  qi=55  INTRA*/
+      {
+        {   23,    7},{  175,  385},{  342,  730},{  561, 1028},
+        {  771, 1279},{  973, 1493},{ 1181, 1669},{ 1384, 1822},
+        { 1602, 1956},{ 1830, 2076},{ 2057, 2184},{ 2270, 2288},
+        { 2452, 2389},{ 2637, 2484},{ 2823, 2559},{ 2983, 2621},
+        { 3129, 2682},{ 3280, 2753},{ 3417, 2833},{ 3554, 2904},
+        { 3743, 2977},{ 3921, 3060},{ 4055, 3137},{ 4185, 3186}
+      },
+      /*Cr  qi=55  INTER*/
+      {
+        {   85,    0},{   99,  352},{  100,  679},{  126, 1025},
+        {  178, 1351},{  256, 1650},{  359, 1935},{  493, 2202},
+        {  675, 2439},{  921, 2636},{ 1220, 2799},{ 1552, 2941},
+        { 1910, 3068},{ 2303, 3177},{ 2735, 3262},{ 3206, 3311},
+        { 3689, 3333},{ 4152, 3327},{ 4588, 3299},{ 4978, 3272},
+        { 5325, 3243},{ 5651, 3221},{ 5969, 3210},{ 6218, 3185}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=56  INTRA*/
+      {
+        {  137,  104},{ 1048, 1128},{ 2147, 1760},{ 3261, 2029},
+        { 4319, 2131},{ 5310, 2234},{ 6245, 2351},{ 7101, 2464},
+        { 7886, 2572},{ 8610, 2675},{ 9270, 2762},{ 9840, 2818},
+        {10365, 2869},{10875, 2928},{11393, 2997},{11900, 3071},
+        {12371, 3128},{12834, 3172},{13233, 3208},{13562, 3228},
+        {13878, 3245},{14221, 3271},{14584, 3292},{15008, 3320}
+      },
+      /*Y'  qi=56  INTER*/
+      {
+        {   19,   21},{  207, 1292},{ 1031, 2252},{ 2553, 2846},
+        { 4463, 3085},{ 6137, 3131},{ 7441, 3151},{ 8526, 3172},
+        { 9468, 3193},{10301, 3209},{11059, 3224},{11760, 3237},
+        {12405, 3249},{13008, 3261},{13570, 3270},{14100, 3278},
+        {14597, 3284},{15074, 3289},{15524, 3297},{15929, 3302},
+        {16314, 3306},{16675, 3307},{17004, 3305},{17288, 3301}
+      }
+    },
+    {
+      /*Cb  qi=56  INTRA*/
+      {
+        {   16,    3},{  188,  367},{  353,  712},{  546, 1017},
+        {  765, 1275},{  989, 1484},{ 1221, 1653},{ 1459, 1791},
+        { 1681, 1920},{ 1893, 2046},{ 2102, 2160},{ 2323, 2257},
+        { 2534, 2347},{ 2720, 2447},{ 2902, 2549},{ 3075, 2654},
+        { 3239, 2749},{ 3392, 2835},{ 3544, 2920},{ 3712, 2988},
+        { 3882, 3052},{ 4052, 3123},{ 4227, 3181},{ 4483, 3213}
+      },
+      /*Cb  qi=56  INTER*/
+      {
+        {   92,   -1},{  111,  343},{  114,  665},{  148, 1003},
+        {  224, 1321},{  345, 1609},{  526, 1858},{  754, 2077},
+        { 1009, 2281},{ 1319, 2464},{ 1702, 2614},{ 2145, 2732},
+        { 2625, 2824},{ 3123, 2890},{ 3634, 2933},{ 4137, 2954},
+        { 4614, 2965},{ 5052, 2988},{ 5468, 3015},{ 5852, 3035},
+        { 6213, 3060},{ 6557, 3081},{ 6906, 3094},{ 7243, 3112}
+      }
+    },
+    {
+      /*Cr  qi=56  INTRA*/
+      {
+        {   28,    8},{  195,  385},{  373,  727},{  598, 1019},
+        {  816, 1263},{ 1033, 1465},{ 1260, 1630},{ 1482, 1773},
+        { 1717, 1900},{ 1949, 2018},{ 2178, 2128},{ 2393, 2233},
+        { 2570, 2338},{ 2749, 2435},{ 2937, 2514},{ 3097, 2577},
+        { 3240, 2638},{ 3398, 2709},{ 3540, 2791},{ 3673, 2865},
+        { 3869, 2938},{ 4049, 3019},{ 4179, 3095},{ 4330, 3137}
+      },
+      /*Cr  qi=56  INTER*/
+      {
+        {   83,    0},{   99,  353},{  103,  676},{  146, 1010},
+        {  232, 1320},{  355, 1601},{  512, 1866},{  713, 2109},
+        {  988, 2312},{ 1344, 2471},{ 1750, 2602},{ 2180, 2719},
+        { 2642, 2819},{ 3141, 2892},{ 3653, 2939},{ 4159, 2961},
+        { 4636, 2961},{ 5072, 2945},{ 5464, 2917},{ 5813, 2895},
+        { 6134, 2890},{ 6458, 2883},{ 6735, 2881},{ 6953, 2902}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=57  INTRA*/
+      {
+        {  170,  106},{ 1106, 1120},{ 2246, 1740},{ 3399, 1993},
+        { 4482, 2077},{ 5492, 2167},{ 6446, 2273},{ 7324, 2379},
+        { 8130, 2482},{ 8866, 2578},{ 9537, 2661},{10119, 2715},
+        {10646, 2762},{11161, 2820},{11694, 2886},{12214, 2957},
+        {12693, 3013},{13166, 3053},{13569, 3087},{13897, 3106},
+        {14224, 3122},{14568, 3148},{14931, 3167},{15390, 3192}
+      },
+      /*Y'  qi=57  INTER*/
+      {
+        {   19,   20},{  205, 1292},{ 1096, 2229},{ 2775, 2766},
+        { 4811, 2943},{ 6512, 2964},{ 7832, 2976},{ 8940, 2990},
+        { 9903, 3004},{10755, 3017},{11532, 3029},{12243, 3039},
+        {12891, 3047},{13502, 3058},{14073, 3065},{14603, 3071},
+        {15097, 3078},{15581, 3083},{16036, 3086},{16452, 3090},
+        {16855, 3093},{17222, 3094},{17552, 3092},{17851, 3098}
+      }
+    },
+    {
+      /*Cb  qi=57  INTRA*/
+      {
+        {   16,    3},{  197,  365},{  384,  704},{  603, 1001},
+        {  837, 1252},{ 1077, 1455},{ 1326, 1618},{ 1581, 1748},
+        { 1819, 1871},{ 2042, 1993},{ 2264, 2104},{ 2500, 2196},
+        { 2722, 2280},{ 2916, 2375},{ 3103, 2473},{ 3290, 2575},
+        { 3456, 2667},{ 3612, 2748},{ 3775, 2829},{ 3958, 2896},
+        { 4145, 2947},{ 4307, 3012},{ 4476, 3070},{ 4733, 3110}
+      },
+      /*Cb  qi=57  INTER*/
+      {
+        {   94,   -1},{  111,  344},{  112,  665},{  147, 1002},
+        {  227, 1319},{  353, 1604},{  543, 1849},{  785, 2062},
+        { 1066, 2257},{ 1408, 2430},{ 1827, 2568},{ 2320, 2670},
+        { 2848, 2743},{ 3386, 2791},{ 3934, 2812},{ 4453, 2820},
+        { 4929, 2830},{ 5368, 2842},{ 5787, 2856},{ 6190, 2875},
+        { 6554, 2896},{ 6895, 2913},{ 7229, 2927},{ 7572, 2932}
+      }
+    },
+    {
+      /*Cr  qi=57  INTRA*/
+      {
+        {   28,    8},{  207,  383},{  413,  716},{  661,  999},
+        {  889, 1237},{ 1123, 1433},{ 1365, 1592},{ 1603, 1731},
+        { 1853, 1852},{ 2103, 1965},{ 2345, 2072},{ 2571, 2173},
+        { 2763, 2271},{ 2949, 2364},{ 3146, 2438},{ 3315, 2497},
+        { 3459, 2552},{ 3618, 2616},{ 3767, 2697},{ 3906, 2773},
+        { 4099, 2841},{ 4281, 2916},{ 4429, 2987},{ 4569, 3030}
+      },
+      /*Cr  qi=57  INTER*/
+      {
+        {   85,    0},{   99,  352},{  102,  675},{  147, 1008},
+        {  235, 1317},{  363, 1597},{  529, 1858},{  748, 2094},
+        { 1050, 2287},{ 1439, 2436},{ 1877, 2557},{ 2352, 2660},
+        { 2869, 2740},{ 3413, 2791},{ 3962, 2815},{ 4485, 2819},
+        { 4955, 2816},{ 5382, 2800},{ 5769, 2772},{ 6107, 2748},
+        { 6443, 2740},{ 6754, 2739},{ 7029, 2737},{ 7284, 2745}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=58  INTRA*/
+      {
+        {  164,  109},{ 1198, 1111},{ 2396, 1737},{ 3606, 1978},
+        { 4727, 2048},{ 5749, 2138},{ 6708, 2243},{ 7584, 2347},
+        { 8388, 2449},{ 9122, 2549},{ 9784, 2635},{10354, 2691},
+        {10876, 2740},{11385, 2800},{11912, 2869},{12429, 2941},
+        {12902, 2997},{13375, 3040},{13779, 3075},{14103, 3096},
+        {14435, 3112},{14783, 3140},{15141, 3160},{15599, 3186}
+      },
+      /*Y'  qi=58  INTER*/
+      {
+        {   14,   23},{  210, 1290},{ 1277, 2178},{ 3118, 2677},
+        { 5207, 2834},{ 6902, 2857},{ 8218, 2878},{ 9323, 2900},
+        {10285, 2919},{11132, 2934},{11899, 2949},{12599, 2961},
+        {13235, 2971},{13835, 2982},{14394, 2991},{14917, 2997},
+        {15412, 3005},{15882, 3009},{16325, 3013},{16735, 3016},
+        {17131, 3018},{17501, 3021},{17824, 3021},{18125, 3016}
+      }
+    },
+    {
+      /*Cb  qi=58  INTRA*/
+      {
+        {   17,    3},{  200,  365},{  389,  703},{  613,  996},
+        {  853, 1243},{ 1095, 1445},{ 1349, 1604},{ 1613, 1731},
+        { 1853, 1853},{ 2074, 1978},{ 2292, 2091},{ 2526, 2184},
+        { 2750, 2266},{ 2945, 2360},{ 3134, 2458},{ 3320, 2561},
+        { 3482, 2654},{ 3641, 2737},{ 3804, 2818},{ 3985, 2881},
+        { 4168, 2935},{ 4331, 3003},{ 4499, 3060},{ 4751, 3100}
+      },
+      /*Cb  qi=58  INTER*/
+      {
+        {   94,   -1},{  112,  345},{  112,  665},{  152,  998},
+        {  247, 1307},{  406, 1580},{  644, 1810},{  938, 2007},
+        { 1271, 2189},{ 1668, 2348},{ 2151, 2470},{ 2691, 2558},
+        { 3249, 2619},{ 3798, 2659},{ 4334, 2682},{ 4849, 2692},
+        { 5314, 2700},{ 5747, 2721},{ 6167, 2742},{ 6547, 2765},
+        { 6902, 2790},{ 7251, 2804},{ 7583, 2819},{ 7924, 2833}
+      }
+    },
+    {
+      /*Cr  qi=58  INTRA*/
+      {
+        {   29,    8},{  210,  382},{  419,  714},{  671,  993},
+        {  903, 1229},{ 1141, 1422},{ 1390, 1578},{ 1635, 1713},
+        { 1889, 1833},{ 2140, 1946},{ 2379, 2055},{ 2604, 2157},
+        { 2794, 2256},{ 2977, 2349},{ 3174, 2422},{ 3339, 2482},
+        { 3483, 2537},{ 3643, 2604},{ 3790, 2684},{ 3927, 2757},
+        { 4112, 2826},{ 4294, 2900},{ 4451, 2975},{ 4600, 3011}
+      },
+      /*Cr  qi=58  INTER*/
+      {
+        {   86,    0},{   99,  352},{  103,  675},{  151, 1004},
+        {  256, 1306},{  417, 1573},{  628, 1819},{  901, 2040},
+        { 1262, 2217},{ 1705, 2353},{ 2191, 2466},{ 2713, 2556},
+        { 3268, 2622},{ 3831, 2664},{ 4374, 2682},{ 4881, 2686},
+        { 5339, 2685},{ 5747, 2668},{ 6123, 2646},{ 6465, 2630},
+        { 6783, 2618},{ 7082, 2623},{ 7366, 2632},{ 7673, 2654}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=59  INTRA*/
+      {
+        {  142,  112},{ 1259, 1100},{ 2552, 1711},{ 3815, 1933},
+        { 4955, 1987},{ 5983, 2068},{ 6949, 2165},{ 7832, 2263},
+        { 8645, 2359},{ 9392, 2454},{10066, 2536},{10643, 2589},
+        {11174, 2636},{11696, 2693},{12230, 2758},{12752, 2826},
+        {13239, 2883},{13721, 2926},{14139, 2959},{14479, 2978},
+        {14811, 2993},{15166, 3020},{15532, 3039},{16000, 3062}
+      },
+      /*Y'  qi=59  INTER*/
+      {
+        {    8,   25},{  211, 1289},{ 1394, 2144},{ 3421, 2580},
+        { 5611, 2689},{ 7316, 2701},{ 8643, 2717},{ 9762, 2734},
+        {10735, 2750},{11587, 2763},{12353, 2775},{13056, 2785},
+        {13693, 2793},{14288, 2805},{14843, 2814},{15361, 2821},
+        {15857, 2827},{16328, 2831},{16763, 2834},{17171, 2838},
+        {17568, 2840},{17941, 2842},{18285, 2843},{18586, 2839}
+      }
+    },
+    {
+      /*Cb  qi=59  INTRA*/
+      {
+        {   17,    3},{  224,  363},{  441,  696},{  689,  982},
+        {  945, 1222},{ 1204, 1416},{ 1474, 1571},{ 1751, 1695},
+        { 2001, 1816},{ 2228, 1941},{ 2453, 2055},{ 2693, 2147},
+        { 2924, 2227},{ 3125, 2321},{ 3321, 2416},{ 3510, 2520},
+        { 3676, 2616},{ 3839, 2699},{ 4008, 2778},{ 4193, 2842},
+        { 4371, 2898},{ 4535, 2965},{ 4710, 3023},{ 4921, 3068}
+      },
+      /*Cb  qi=59  INTER*/
+      {
+        {   95,   -5},{  111,  343},{  112,  664},{  157,  995},
+        {  258, 1302},{  429, 1569},{  691, 1790},{ 1017, 1977},
+        { 1387, 2148},{ 1832, 2294},{ 2368, 2401},{ 2961, 2472},
+        { 3553, 2518},{ 4133, 2545},{ 4688, 2557},{ 5198, 2563},
+        { 5663, 2574},{ 6100, 2590},{ 6511, 2608},{ 6898, 2621},
+        { 7274, 2634},{ 7631, 2655},{ 7984, 2669},{ 8361, 2669}
+      }
+    },
+    {
+      /*Cr  qi=59  INTRA*/
+      {
+        {   31,    8},{  240,  379},{  480,  706},{  748,  978},
+        {  993, 1208},{ 1250, 1394},{ 1519, 1543},{ 1779, 1674},
+        { 2047, 1792},{ 2307, 1904},{ 2552, 2013},{ 2780, 2116},
+        { 2973, 2216},{ 3165, 2309},{ 3362, 2383},{ 3528, 2444},
+        { 3677, 2499},{ 3841, 2566},{ 3995, 2646},{ 4139, 2720},
+        { 4324, 2793},{ 4504, 2867},{ 4658, 2939},{ 4806, 2975}
+      },
+      /*Cr  qi=59  INTER*/
+      {
+        {   89,   -3},{   98,  352},{  103,  674},{  156, 1002},
+        {  268, 1300},{  441, 1562},{  673, 1801},{  980, 2010},
+        { 1385, 2175},{ 1868, 2301},{ 2401, 2402},{ 2984, 2474},
+        { 3591, 2520},{ 4179, 2545},{ 4729, 2555},{ 5232, 2553},
+        { 5679, 2545},{ 6081, 2530},{ 6447, 2510},{ 6791, 2496},
+        { 7101, 2487},{ 7393, 2489},{ 7684, 2499},{ 7950, 2501}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=60  INTRA*/
+      {
+        {   92,  116},{ 1361, 1085},{ 2746, 1686},{ 4050, 1895},
+        { 5209, 1939},{ 6244, 2012},{ 7213, 2103},{ 8105, 2197},
+        { 8928, 2290},{ 9685, 2381},{10371, 2460},{10952, 2511},
+        {11487, 2556},{12026, 2611},{12574, 2674},{13102, 2739},
+        {13597, 2793},{14092, 2831},{14523, 2862},{14862, 2881},
+        {15198, 2897},{15568, 2923},{15949, 2941},{16416, 2964}
+      },
+      /*Y'  qi=60  INTER*/
+      {
+        {    4,   30},{  215, 1287},{ 1547, 2104},{ 3729, 2491},
+        { 5973, 2568},{ 7672, 2577},{ 9001, 2591},{10123, 2606},
+        {11094, 2620},{11943, 2632},{12709, 2643},{13409, 2652},
+        {14044, 2660},{14641, 2669},{15193, 2677},{15709, 2684},
+        {16201, 2689},{16675, 2693},{17118, 2696},{17522, 2701},
+        {17920, 2704},{18293, 2706},{18620, 2702},{18923, 2700}
+      }
+    },
+    {
+      /*Cb  qi=60  INTRA*/
+      {
+        {   18,    3},{  227,  362},{  447,  694},{  708,  974},
+        {  981, 1207},{ 1252, 1397},{ 1532, 1547},{ 1822, 1663},
+        { 2082, 1780},{ 2316, 1903},{ 2548, 2013},{ 2794, 2101},
+        { 3029, 2178},{ 3242, 2266},{ 3445, 2360},{ 3638, 2459},
+        { 3816, 2547},{ 3980, 2628},{ 4146, 2708},{ 4344, 2766},
+        { 4546, 2812},{ 4725, 2872},{ 4880, 2930},{ 5054, 2966}
+      },
+      /*Cb  qi=60  INTER*/
+      {
+        {   97,   -4},{  112,  343},{  114,  664},{  162,  993},
+        {  273, 1294},{  472, 1553},{  774, 1762},{ 1138, 1939},
+        { 1543, 2102},{ 2034, 2236},{ 2620, 2329},{ 3244, 2389},
+        { 3860, 2423},{ 4443, 2440},{ 4997, 2449},{ 5502, 2455},
+        { 5962, 2458},{ 6413, 2466},{ 6836, 2485},{ 7217, 2506},
+        { 7592, 2518},{ 7957, 2533},{ 8291, 2543},{ 8574, 2545}
+      }
+    },
+    {
+      /*Cr  qi=60  INTRA*/
+      {
+        {   32,    8},{  243,  379},{  488,  702},{  771,  968},
+        { 1030, 1192},{ 1300, 1373},{ 1581, 1517},{ 1854, 1643},
+        { 2127, 1757},{ 2393, 1864},{ 2645, 1968},{ 2879, 2068},
+        { 3078, 2166},{ 3277, 2256},{ 3484, 2325},{ 3660, 2381},
+        { 3808, 2433},{ 3970, 2496},{ 4138, 2571},{ 4288, 2643},
+        { 4475, 2710},{ 4655, 2778},{ 4810, 2843},{ 4959, 2879}
+      },
+      /*Cr  qi=60  INTER*/
+      {
+        {   86,   -2},{   99,  352},{  103,  673},{  160,  998},
+        {  284, 1292},{  484, 1546},{  753, 1774},{ 1100, 1973},
+        { 1546, 2129},{ 2072, 2246},{ 2652, 2334},{ 3279, 2392},
+        { 3911, 2425},{ 4504, 2440},{ 5044, 2443},{ 5536, 2440},
+        { 5979, 2430},{ 6381, 2413},{ 6735, 2397},{ 7062, 2382},
+        { 7383, 2376},{ 7680, 2375},{ 7962, 2373},{ 8203, 2379}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=61  INTRA*/
+      {
+        {   54,  121},{ 1477, 1069},{ 3061, 1638},{ 4465, 1808},
+        { 5649, 1827},{ 6710, 1884},{ 7716, 1958},{ 8648, 2037},
+        { 9514, 2116},{10311, 2192},{11033, 2261},{11641, 2305},
+        {12202, 2342},{12771, 2387},{13356, 2440},{13924, 2493},
+        {14444, 2541},{14951, 2576},{15409, 2600},{15779, 2615},
+        {16131, 2626},{16521, 2648},{16921, 2663},{17409, 2694}
+      },
+      /*Y'  qi=61  INTER*/
+      {
+        {   -1,   32},{  216, 1286},{ 1806, 2036},{ 4279, 2327},
+        { 6629, 2352},{ 8347, 2352},{ 9707, 2357},{10860, 2364},
+        {11857, 2372},{12726, 2377},{13508, 2382},{14225, 2387},
+        {14877, 2392},{15484, 2398},{16048, 2401},{16581, 2405},
+        {17092, 2409},{17573, 2409},{18016, 2410},{18427, 2413},
+        {18829, 2415},{19221, 2415},{19578, 2415},{19980, 2413}
+      }
+    },
+    {
+      /*Cb  qi=61  INTRA*/
+      {
+        {   19,    3},{  231,  362},{  456,  693},{  733,  965},
+        { 1032, 1188},{ 1330, 1369},{ 1637, 1508},{ 1956, 1612},
+        { 2241, 1718},{ 2496, 1832},{ 2750, 1932},{ 3019, 2007},
+        { 3274, 2074},{ 3505, 2154},{ 3725, 2236},{ 3943, 2323},
+        { 4138, 2403},{ 4323, 2476},{ 4505, 2543},{ 4706, 2592},
+        { 4909, 2630},{ 5109, 2675},{ 5292, 2724},{ 5495, 2768}
+      },
+      /*Cb  qi=61  INTER*/
+      {
+        {   91,   -2},{  111,  344},{  114,  663},{  166,  989},
+        {  291, 1285},{  522, 1534},{  875, 1729},{ 1302, 1889},
+        { 1786, 2031},{ 2368, 2141},{ 3042, 2207},{ 3734, 2243},
+        { 4388, 2259},{ 4982, 2264},{ 5533, 2265},{ 6043, 2262},
+        { 6524, 2264},{ 6982, 2274},{ 7422, 2283},{ 7831, 2295},
+        { 8198, 2308},{ 8593, 2319},{ 8965, 2329},{ 9258, 2340}
+      }
+    },
+    {
+      /*Cr  qi=61  INTRA*/
+      {
+        {   33,    9},{  245,  378},{  497,  699},{  801,  958},
+        { 1087, 1171},{ 1384, 1342},{ 1692, 1474},{ 1992, 1589},
+        { 2290, 1692},{ 2576, 1789},{ 2852, 1884},{ 3109, 1973},
+        { 3324, 2061},{ 3544, 2142},{ 3763, 2199},{ 3945, 2244},
+        { 4103, 2292},{ 4283, 2349},{ 4469, 2413},{ 4635, 2476},
+        { 4836, 2534},{ 5038, 2592},{ 5210, 2649},{ 5358, 2682}
+      },
+      /*Cr  qi=61  INTER*/
+      {
+        {   82,    0},{   97,  353},{  104,  672},{  165,  995},
+        {  303, 1284},{  532, 1529},{  852, 1742},{ 1273, 1921},
+        { 1798, 2057},{ 2409, 2154},{ 3090, 2212},{ 3794, 2240},
+        { 4460, 2251},{ 5057, 2249},{ 5596, 2249},{ 6085, 2245},
+        { 6519, 2234},{ 6908, 2220},{ 7269, 2203},{ 7618, 2196},
+        { 7949, 2198},{ 8269, 2195},{ 8554, 2196},{ 8928, 2217}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=62  INTRA*/
+      {
+        {   29,  124},{ 1527, 1067},{ 3221, 1618},{ 4703, 1751},
+        { 5909, 1744},{ 7001, 1779},{ 8057, 1829},{ 9049, 1885},
+        { 9968, 1943},{10813, 1999},{11572, 2050},{12206, 2082},
+        {12801, 2107},{13402, 2140},{14020, 2180},{14625, 2223},
+        {15179, 2260},{15718, 2288},{16196, 2305},{16581, 2313},
+        {16963, 2324},{17382, 2341},{17800, 2351},{18318, 2376}
+      },
+      /*Y'  qi=62  INTER*/
+      {
+        {   -8,   36},{  218, 1284},{ 2073, 1965},{ 4814, 2159},
+        { 7237, 2138},{ 8979, 2124},{10378, 2115},{11570, 2109},
+        {12601, 2106},{13503, 2103},{14320, 2103},{15064, 2103},
+        {15746, 2103},{16384, 2104},{16975, 2105},{17534, 2105},
+        {18062, 2106},{18564, 2107},{19035, 2106},{19471, 2107},
+        {19890, 2107},{20288, 2107},{20651, 2107},{21012, 2108}
+      }
+    },
+    {
+      /*Cb  qi=62  INTRA*/
+      {
+        {   21,    3},{  283,  360},{  565,  683},{  907,  938},
+        { 1269, 1143},{ 1611, 1311},{ 1949, 1441},{ 2290, 1535},
+        { 2596, 1632},{ 2877, 1738},{ 3162, 1828},{ 3458, 1893},
+        { 3745, 1948},{ 4011, 2016},{ 4253, 2089},{ 4506, 2164},
+        { 4734, 2233},{ 4943, 2294},{ 5162, 2353},{ 5381, 2393},
+        { 5593, 2420},{ 5807, 2454},{ 6003, 2496},{ 6210, 2543}
+      },
+      /*Cb  qi=62  INTER*/
+      {
+        {   91,   -1},{  110,  344},{  113,  663},{  169,  987},
+        {  306, 1279},{  562, 1519},{  961, 1701},{ 1450, 1845},
+        { 2013, 1967},{ 2686, 2053},{ 3437, 2095},{ 4171, 2109},
+        { 4841, 2109},{ 5441, 2105},{ 6002, 2097},{ 6542, 2089},
+        { 7028, 2087},{ 7491, 2088},{ 7949, 2090},{ 8377, 2089},
+        { 8789, 2095},{ 9195, 2103},{ 9569, 2104},{ 9937, 2102}
+      }
+    },
+    {
+      /*Cr  qi=62  INTRA*/
+      {
+        {   38,    8},{  308,  374},{  619,  685},{  984,  925},
+        { 1326, 1126},{ 1662, 1285},{ 1999, 1407},{ 2328, 1512},
+        { 2659, 1604},{ 2976, 1691},{ 3285, 1774},{ 3570, 1853},
+        { 3815, 1931},{ 4068, 1998},{ 4304, 2044},{ 4491, 2082},
+        { 4666, 2124},{ 4870, 2174},{ 5078, 2231},{ 5262, 2285},
+        { 5480, 2335},{ 5703, 2378},{ 5905, 2423},{ 6075, 2454}
+      },
+      /*Cr  qi=62  INTER*/
+      {
+        {   79,    1},{   95,  353},{  102,  671},{  169,  992},
+        {  318, 1277},{  569, 1515},{  936, 1716},{ 1428, 1876},
+        { 2034, 1993},{ 2738, 2067},{ 3511, 2095},{ 4268, 2094},
+        { 4943, 2087},{ 5543, 2079},{ 6074, 2074},{ 6552, 2069},
+        { 6985, 2057},{ 7366, 2043},{ 7728, 2030},{ 8086, 2021},
+        { 8423, 2017},{ 8752, 2016},{ 9057, 2014},{ 9376, 2008}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=63  INTRA*/
+      {
+        {  -59,  134},{ 1734, 1036},{ 3743, 1521},{ 5309, 1618},
+        { 6520, 1597},{ 7664, 1609},{ 8809, 1630},{ 9894, 1657},
+        {10907, 1687},{11838, 1717},{12673, 1744},{13379, 1758},
+        {14038, 1767},{14698, 1784},{15379, 1806},{16062, 1831},
+        {16694, 1852},{17300, 1867},{17827, 1878},{18250, 1881},
+        {18702, 1884},{19199, 1892},{19665, 1896},{20273, 1908}
+      },
+      /*Y'  qi=63  INTER*/
+      {
+        {   -7,   33},{  209, 1285},{ 2309, 1904},{ 5274, 2025},
+        { 7801, 1966},{ 9637, 1924},{11126, 1892},{12403, 1868},
+        {13515, 1849},{14491, 1834},{15380, 1822},{16197, 1814},
+        {16944, 1806},{17645, 1799},{18303, 1794},{18916, 1789},
+        {19494, 1785},{20056, 1782},{20568, 1779},{21047, 1776},
+        {21508, 1775},{21925, 1772},{22327, 1770},{22678, 1771}
+      }
+    },
+    {
+      /*Cb  qi=63  INTRA*/
+      {
+        {   20,    3},{  294,  357},{  608,  673},{ 1047,  908},
+        { 1501, 1090},{ 1898, 1240},{ 2275, 1353},{ 2654, 1427},
+        { 3014, 1502},{ 3366, 1579},{ 3726, 1637},{ 4084, 1674},
+        { 4425, 1703},{ 4752, 1743},{ 5058, 1791},{ 5377, 1838},
+        { 5676, 1877},{ 5946, 1912},{ 6213, 1945},{ 6458, 1969},
+        { 6704, 1982},{ 6969, 1997},{ 7210, 2017},{ 7439, 2037}
+      },
+      /*Cb  qi=63  INTER*/
+      {
+        {   86,    1},{  108,  345},{  111,  663},{  168,  985},
+        {  307, 1276},{  577, 1513},{ 1007, 1688},{ 1550, 1819},
+        { 2189, 1921},{ 2938, 1981},{ 3744, 2002},{ 4512, 2002},
+        { 5199, 1996},{ 5824, 1986},{ 6419, 1971},{ 6978, 1954},
+        { 7507, 1940},{ 8015, 1932},{ 8502, 1928},{ 8978, 1920},
+        { 9410, 1915},{ 9842, 1910},{10262, 1901},{10634, 1896}
+      }
+    },
+    {
+      /*Cr  qi=63  INTRA*/
+      {
+        {   38,    7},{  324,  367},{  677,  670},{ 1136,  892},
+        { 1562, 1070},{ 1951, 1209},{ 2326, 1313},{ 2694, 1399},
+        { 3074, 1471},{ 3460, 1531},{ 3850, 1575},{ 4214, 1622},
+        { 4522, 1679},{ 4819, 1723},{ 5089, 1749},{ 5315, 1769},
+        { 5530, 1792},{ 5756, 1825},{ 6006, 1860},{ 6244, 1889},
+        { 6514, 1924},{ 6792, 1946},{ 7026, 1962},{ 7191, 1971}
+      },
+      /*Cr  qi=63  INTER*/
+      {
+        {   80,    2},{   95,  354},{  101,  671},{  167,  990},
+        {  321, 1274},{  585, 1509},{  984, 1702},{ 1534, 1849},
+        { 2217, 1947},{ 3005, 1995},{ 3839, 1999},{ 4619, 1986},
+        { 5310, 1973},{ 5933, 1961},{ 6486, 1952},{ 6988, 1942},
+        { 7435, 1927},{ 7817, 1911},{ 8198, 1900},{ 8552, 1895},
+        { 8881, 1890},{ 9253, 1883},{ 9598, 1876},{ 9923, 1859}
+      }
+    }
+  }
+};
+
+#endif

Copied: trunk/theora/lib/ocintrin.h (from rev 16442, trunk/theora/lib/dec/ocintrin.h)
===================================================================
--- trunk/theora/lib/ocintrin.h	                        (rev 0)
+++ trunk/theora/lib/ocintrin.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,128 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include <math.h>
+#if !defined(_ocintrin_H)
+# define _ocintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or inline assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Note that we do not provide a macro for abs(), because it is provided as a
+   library function, which we assume is translated into an intrinsic to avoid
+   the function call overhead and then implemented in the smartest way for the
+   target platform.
+  With modern gcc (4.x), this is true: it uses cmov instructions if the
+   architecture supports it and branchless bit-twiddling if it does not (the
+   speed difference between the two approaches is not measurable).
+  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
+   by Sun Microsystems, despite prior art dating back to at least 1996:
+   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
+  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
+   conditional jump, which is horrible on deeply piplined architectures (e.g.,
+   all consumer architectures for the past decade or more).
+  Also be warned that -C*abs(x) where C is a constant is mis-optimized as
+   abs(C*x) on every gcc release before 4.2.3.
+  See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+#define OC_MAXI(_a,_b)      ((_a)-((_a)-(_b)&-((_b)>(_a))))
+#define OC_MINI(_a,_b)      ((_a)+((_b)-(_a)&-((_b)<(_a))))
+/*Clamps an integer into the given range.
+  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+   behavior is required to meet our documented API behavior).
+  _a: The lower bound.
+  _b: The value to clamp.
+  _c: The upper boud.*/
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
+/*This has a chance of compiling branchless, and is just as fast as the
+   bit-twiddling method, which is slightly less portable, since it relies on a
+   sign-extended rightshift, which is not guaranteed by ANSI (but present on
+   every relevant platform).*/
+#define OC_SIGNI(_a)        (((_a)>0)-((_a)<0))
+/*Slightly more portable than relying on a sign-extended right-shift (which is
+   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
+   compile it into the right-shift anyway.*/
+#define OC_SIGNMASK(_a)     (-((_a)<0))
+/*Divides an integer by a power of two, truncating towards 0.
+  _dividend: The integer to divide.
+  _shift:    The non-negative power of two to divide by.
+  _rmask:    (1<<_shift)-1*/
+#define OC_DIV_POW2(_dividend,_shift,_rmask)\
+  ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift))
+/*Divides _x by 65536, truncating towards 0.*/
+#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
+/*Divides _x by 2, truncating towards 0.*/
+#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
+/*Divides _x by 8, truncating towards 0.*/
+#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
+/*Divides _x by 16, truncating towards 0.*/
+#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
+/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
+   negative dividends first.
+  When _rval is (1<<_shift-1), this is equivalent to division with rounding
+   ties away from zero.*/
+#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
+  ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift))
+/*Divides a _x by 2, rounding towards even numbers.*/
+#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1)
+/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/
+#define OC_DIV_POW2_RE(_x,_shift) \
+  ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift))
+/*Swaps two integers _a and _b if _a>_b.*/
+#define OC_SORT2I(_a,_b) \
+  do{ \
+    int t__; \
+    t__=((_a)^(_b))&-((_b)<(_a)); \
+    (_a)^=t__; \
+    (_b)^=t__; \
+  } \
+  while(0)
+
+/*Accesses one of four (signed) bytes given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \
+  ((signed char) \
+   (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8))
+/*Accesses one of eight (unsigned) nibbles given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \
+  ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \
+   ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF)
+
+
+
+/*All of these macros should expect floats as arguments.*/
+#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
+#define OC_FABSF(_f)        ((float)fabs(_f))
+#define OC_SQRTF(_f)        ((float)sqrt(_f))
+#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
+#define OC_LOGF(_f)         ((float)log(_f))
+#define OC_IFLOORF(_f)      ((int)floor(_f))
+#define OC_ICEILF(_f)       ((int)ceil(_f))
+
+#endif

Copied: trunk/theora/lib/quant.c (from rev 16442, trunk/theora/lib/dec/quant.c)
===================================================================
--- trunk/theora/lib/quant.c	                        (rev 0)
+++ trunk/theora/lib/quant.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,119 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "quant.h"
+#include "decint.h"
+
+static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
+static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
+
+/*Initializes the dequantization tables from a set of quantizer info.
+  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
+   be initialized as pointing to the storage reserved for them in the
+   oc_theora_state (resp. oc_enc_ctx) structure.
+  If some tables are duplicates of others, the pointers will be adjusted to
+   point to a single copy of the tables, but the storage for them will not be
+   freed.
+  If you're concerned about the memory footprint, the obvious thing to do is
+   to move the storage out of its fixed place in the structures and allocate
+   it on demand.
+  However, a much, much better option is to only store the quantization
+   matrices being used for the current frame, and to recalculate these as the
+   qi values change between frames (this is what VP3 did).*/
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo){
+  /*Coding mode: intra or inter.*/
+  int          qti;
+  /*Y', C_b, C_r*/
+  int          pli;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    /*Quality index.*/
+    int qi;
+    /*Range iterator.*/
+    int qri;
+    for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
+      th_quant_base base;
+      ogg_uint32_t  q;
+      int           qi_start;
+      int           qi_end;
+      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
+       sizeof(base));
+      qi_start=qi;
+      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
+      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
+      /*Iterate over quality indicies in this range.*/
+      for(;;){
+        ogg_uint32_t qfac;
+        int          zzi;
+        int          ci;
+        /*In the original VP3.2 code, the rounding offset and the size of the
+           dead zone around 0 were controlled by a "sharpness" parameter.
+          The size of our dead zone is now controlled by the per-coefficient
+           quality thresholds returned by our HVS module.
+          We round down from a more accurate value when the quality of the
+           reconstruction does not fall below our threshold and it saves bits.
+          Hence, all of that VP3.2 code is gone from here, and the remaining
+           floating point code has been implemented as equivalent integer code
+           with exact precision.*/
+        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
+        /*For postprocessing, not dequantization.*/
+        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
+        /*Scale DC the coefficient from the proper table.*/
+        q=(qfac/100)<<2;
+        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+        _dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
+        /*Now scale AC coefficients from the proper table.*/
+        for(zzi=1;zzi<64;zzi++){
+          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
+          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+          _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
+        }
+        /*If this is a duplicate of a previous matrix, use that instead.
+          This simple check helps us improve cache coherency later.*/
+        {
+          int dupe;
+          int qtj;
+          int plj;
+          dupe=0;
+          for(qtj=0;qtj<=qti;qtj++){
+            for(plj=0;plj<(qtj<qti?3:pli);plj++){
+              if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
+               sizeof(oc_quant_table))){
+                dupe=1;
+                break;
+              }
+            }
+            if(dupe)break;
+          }
+          if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
+        }
+        if(++qi>=qi_end)break;
+        /*Interpolate the next base matrix.*/
+        for(ci=0;ci<64;ci++){
+          base[ci]=(unsigned char)(
+           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
+           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
+           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
+        }
+      }
+    }
+  }
+}

Copied: trunk/theora/lib/quant.h (from rev 16442, trunk/theora/lib/dec/quant.h)
===================================================================
--- trunk/theora/lib/quant.h	                        (rev 0)
+++ trunk/theora/lib/quant.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,33 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_quant_H)
+# define _quant_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+typedef ogg_uint16_t   oc_quant_table[64];
+
+
+/*Maximum scaled quantizer value.*/
+#define OC_QUANT_MAX          (1024<<2)
+
+
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo);
+
+#endif

Copied: trunk/theora/lib/rate.c (from rev 16442, trunk/theora/lib/enc/rate.c)
===================================================================
--- trunk/theora/lib/rate.c	                        (rev 0)
+++ trunk/theora/lib/rate.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,1113 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+/*A rough lookup table for tan(x), 0<=x<pi/2.
+  The values are Q12 fixed-point and spaced at 5 degree intervals.
+  These decisions are somewhat arbitrary, but sufficient for the 2nd order
+   Bessel follower below.
+  Values of x larger than 85 degrees are extrapolated from the last inteval,
+   which is way off, but "good enough".*/
+static unsigned short OC_ROUGH_TAN_LOOKUP[18]={
+      0,  358,  722, 1098, 1491, 1910,
+   2365, 2868, 3437, 4096, 4881, 5850,
+   7094, 8784,11254,15286,23230,46817
+};
+
+/*_alpha is Q24 in the range [0,0.5).
+  The return values is 5.12.*/
+static int oc_warp_alpha(int _alpha){
+  int i;
+  int d;
+  int t0;
+  int t1;
+  i=_alpha*36>>24;
+  if(i>=17)i=16;
+  t0=OC_ROUGH_TAN_LOOKUP[i];
+  t1=OC_ROUGH_TAN_LOOKUP[i+1];
+  d=_alpha*36-(i<<24);
+  return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32);
+}
+
+/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
+   and initial value.
+  _value is Q24.*/
+void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){
+  int         alpha;
+  ogg_int64_t one48;
+  ogg_int64_t warp;
+  ogg_int64_t k1;
+  ogg_int64_t k2;
+  ogg_int64_t d;
+  ogg_int64_t a;
+  ogg_int64_t ik2;
+  ogg_int64_t b1;
+  ogg_int64_t b2;
+  /*This borrows some code from an unreleased version of Postfish.
+    See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
+     on deriving the filter coefficients.*/
+  /*alpha is Q24*/
+  alpha=(1<<24)/_delay;
+  one48=(ogg_int64_t)1<<48;
+  /*warp is 7.12*/
+  warp=OC_MAXI(oc_warp_alpha(alpha),1);
+  /*k1 is 9.12*/
+  k1=3*warp;
+  /*k2 is 16.24.*/
+  k2=k1*warp;
+  /*d is 16.15.*/
+  d=((1<<12)+k1<<12)+k2+256>>9;
+  /*a is 0.32, since d is larger than both 1.0 and k2.*/
+  a=(k2<<23)/d;
+  /*ik2 is 25.24.*/
+  ik2=one48/k2;
+  /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b1=2*a*(ik2-(1<<24));
+  /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b2=(one48<<8)-(4*a<<24)-b1;
+  /*All of the filter parameters are Q24.*/
+  _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
+  _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
+  _f->g=(ogg_int32_t)(a+128>>8);
+  _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value;
+}
+
+static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,int _x){
+  ogg_int64_t c0;
+  ogg_int64_t c1;
+  ogg_int64_t g;
+  ogg_int64_t x0;
+  ogg_int64_t x1;
+  ogg_int64_t y0;
+  ogg_int64_t y1;
+  ogg_int64_t ya;
+  c0=_f->c[0];
+  c1=_f->c[1];
+  g=_f->g;
+  x0=_f->x[0];
+  x1=_f->x[1];
+  y0=_f->y[0];
+  y1=_f->y[1];
+  ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24;
+  _f->x[1]=(ogg_int32_t)x0;
+  _f->x[0]=_x;
+  _f->y[1]=(ogg_int32_t)y0;
+  _f->y[0]=(ogg_int32_t)ya;
+  return ya;
+}
+
+
+
+/*Search for the quantizer that matches the target most closely.
+  We don't assume a linear ordering, but when there are ties we pick the
+   quantizer closest to the old one.*/
+static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old,
+ int _qi_min,ogg_int64_t _log_qtarget){
+  ogg_int64_t best_qdiff;
+  int         best_qi;
+  int         qi;
+  best_qi=_qi_min;
+  best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget;
+  best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
+  for(qi=_qi_min+1;qi<64;qi++){
+    ogg_int64_t qdiff;
+    qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget;
+    qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
+    if(qdiff<best_qdiff||
+     qdiff==best_qdiff&&abs(qi-_qi_old)<abs(best_qi-_qi_old)){
+      best_qi=qi;
+      best_qdiff=qdiff;
+    }
+  }
+  return best_qi;
+}
+
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
+  ogg_int64_t lq;
+  int         qi;
+  int         qi1;
+  int         nqis;
+  /*For now, lambda is fixed depending on the qi value and frame type:
+      lambda=qscale*(qavg[qti][qi]**2),
+     where qscale=0.2125.
+    This was derived by exhaustively searching for the optimal quantizer for
+     the AC coefficients in each block from a number of test sequences for a
+     number of fixed lambda values and fitting the peaks of the resulting
+     histograms (on the log(qavg) scale).
+    The same model applies to both inter and intra frames.
+    A more adaptive scheme might perform better.*/
+  qi=_enc->state.qis[0];
+  /*If rate control is active, use the lambda for the _target_ quantizer.
+    This allows us to scale to rates slightly lower than we'd normally be able
+     to reach, and give the rate control a semblance of "fractional qi"
+     precision.
+    TODO: Add API for changing QI, and allow extra precision.*/
+  if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
+  else lq=_enc->log_qavg[_qti][qi];
+  /*The resulting lambda value is less than 0x500000.*/
+  _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
+  /*Select additional quantizers.
+    The R-D optimal block AC quantizer statistics suggest that the distribution
+     is roughly Gaussian-like with a slight positive skew.
+    K-means clustering on log_qavg to select 3 quantizers produces cluster
+     centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
+    Experiments confirm these are relatively good choices.
+
+    Although we do greedy R-D optimization of the qii flags to avoid switching
+     too frequently, this becomes ineffective at low rates, either because we
+     do a poor job of predicting the actual R-D cost, or the greedy
+     optimization is not sufficient.
+    Therefore adaptive quantization is disabled above an (experimentally
+     suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
+     INTER qi=20 with current matrices).
+    This may need to be revised if the R-D cost estimation or qii flag
+     optimization strategies change.*/
+  nqis=1;
+  if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){
+    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
+     lq+(OC_Q57(7)+5)/10);
+    if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
+    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
+     lq-(OC_Q57(6)+5)/10);
+    if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
+  }
+  _enc->state.nqis=nqis;
+}
+
+/*Binary exponential of _log_scale with 24-bit fractional precision and
+   saturation.
+  _log_scale: A binary logarithm in Q57 format.
+  Return: The binary exponential in Q24 format, saturated to 2**31-1 if
+   _log_scale was too large.*/
+static ogg_int32_t oc_bexp_q24(ogg_int64_t _log_scale){
+  if(_log_scale<OC_Q57(8)){
+    ogg_int64_t ret;
+    ret=oc_bexp64(_log_scale+OC_Q57(24));
+    return ret<0x7FFFFFFF?(ogg_int32_t)ret:0x7FFFFFFF;
+  }
+  return 0x7FFFFFFF;
+}
+
+
+
+static void oc_enc_rc_reset(oc_enc_ctx *_enc){
+  ogg_int64_t npixels;
+  ogg_int64_t ibpp;
+  int         inter_delay;
+  /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
+  _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
+   (ogg_int64_t)_enc->state.info.fps_denominator)/
+   _enc->state.info.fps_numerator;
+  /*Insane framerates or frame sizes mean insane bitrates.
+    Let's not get carried away.*/
+  if(_enc->rc.bits_per_frame>0x400000000000LL){
+    _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
+  }
+  else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
+  _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
+  _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
+  /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
+     on a single keyframe interval.
+    We can require fully half the bits in an interval for a keyframe, so this
+     initial level gives us maximum flexibility for over/under-shooting in
+     subsequent frames.*/
+  _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
+   OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
+  _enc->rc.fullness=_enc->rc.target;
+  /*Pick exponents and initial scales for quantizer selection.*/
+  npixels=_enc->state.info.frame_width*
+   (ogg_int64_t)_enc->state.info.frame_height;
+  _enc->rc.log_npixels=oc_blog64(npixels);
+  ibpp=npixels/_enc->rc.bits_per_frame;
+  if(ibpp<1){
+    _enc->rc.exp[0]=59;
+    _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
+  }
+  else if(ibpp<2){
+    _enc->rc.exp[0]=55;
+    _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
+  }
+  else{
+    _enc->rc.exp[0]=48;
+    _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
+  }
+  if(ibpp<4){
+    _enc->rc.exp[1]=100;
+    _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
+  }
+  else if(ibpp<8){
+    _enc->rc.exp[1]=95;
+    _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
+  }
+  else{
+    _enc->rc.exp[1]=73;
+    _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
+  }
+  _enc->rc.prev_drop_count=0;
+  _enc->rc.log_drop_scale=OC_Q57(0);
+  /*Set up second order followers, initialized according to corresponding
+     time constants.*/
+  oc_iir_filter_init(&_enc->rc.scalefilter[0],2,
+   oc_bexp_q24(_enc->rc.log_scale[0]));
+  inter_delay=_enc->rc.twopass?
+   OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay;
+  oc_iir_filter_init(&_enc->rc.scalefilter[1],inter_delay>>1,
+   oc_bexp_q24(_enc->rc.log_scale[1]));
+  oc_iir_filter_init(&_enc->rc.vfrfilter,2,
+   oc_bexp_q24(_enc->rc.log_drop_scale));
+}
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){
+  _rc->twopass=0;
+  _rc->twopass_buffer_bytes=0;
+  _rc->twopass_force_kf=0;
+  _rc->frame_metrics=NULL;
+  if(_enc->state.info.target_bitrate>0){
+    /*The buffer size is set equal to the keyframe interval, clamped to the
+       range [12,256] frames.
+      The 12 frame minimum gives us some chance to distribute bit estimation
+       errors.
+      The 256 frame maximum means we'll require 8-10 seconds of pre-buffering
+       at 24-30 fps, which is not unreasonable.*/
+    _rc->buf_delay=_enc->keyframe_frequency_force>256?
+     256:_enc->keyframe_frequency_force;
+    /*By default, enforce all buffer constraints.*/
+    _rc->drop_frames=1;
+    _rc->cap_overflow=1;
+    _rc->cap_underflow=0;
+    oc_enc_rc_reset(_enc);
+  }
+}
+
+void oc_rc_state_clear(oc_rc_state *_rc){
+  _ogg_free(_rc->frame_metrics);
+}
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc){
+  /*If encoding has not yet begun, reset the buffer state.*/
+  if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
+  else{
+    /*Otherwise, update the bounds on the buffer, but not the current
+       fullness.*/
+    _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
+     (ogg_int64_t)_enc->state.info.fps_denominator)/
+     _enc->state.info.fps_numerator;
+    /*Insane framerates or frame sizes mean insane bitrates.
+      Let's not get carried away.*/
+    if(_enc->rc.bits_per_frame>0x400000000000LL){
+      _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
+    }
+    else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
+    _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
+    _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
+    _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
+     OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
+    oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.buf_delay>>1,
+     oc_bexp_q24(_enc->rc.log_scale[1]));
+  }
+  /*If we're in pass-2 mode, make sure the frame metrics array is big enough
+     to hold frame statistics for the full buffer.*/
+  if(_enc->rc.twopass==2){
+    int cfm;
+    int buf_delay;
+    int reset_window;
+    buf_delay=_enc->rc.buf_delay;
+    reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
+     buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+     +_enc->rc.frames_total[2]);
+    cfm=_enc->rc.cframe_metrics;
+    /*Only try to resize the frame metrics buffer if a) it's too small and
+       b) we were using a finite buffer, or are about to start.*/
+    if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
+      oc_frame_metrics *fm;
+      int               nfm;
+      int               fmh;
+      fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
+       buf_delay*sizeof(*_enc->rc.frame_metrics));
+      if(fm==NULL){
+        /*We failed to allocate a finite buffer.*/
+        /*If we don't have a valid 2-pass header yet, just return; we'll reset
+           the buffer size when we read the header.*/
+        if(_enc->rc.frames_total[0]==0)return;
+        /*Otherwise revert to the largest finite buffer previously set, or to
+           whole-file buffering if we were still using that.*/
+        _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
+         cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+         +_enc->rc.frames_total[2];
+        oc_enc_rc_resize(_enc);
+        return;
+      }
+      _enc->rc.frame_metrics=fm;
+      _enc->rc.cframe_metrics=buf_delay;
+      /*Re-organize the circular buffer.*/
+      fmh=_enc->rc.frame_metrics_head;
+      nfm=_enc->rc.nframe_metrics;
+      if(fmh+nfm>cfm){
+        int shift;
+        shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
+        memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
+        if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
+      }
+    }
+    /*We were using whole-file buffering; now we're not.*/
+    if(reset_window){
+      _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
+      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
+      _enc->rc.scale_window_end=_enc->rc.scale_window0=
+       _enc->state.curframe_num+_enc->prev_dup_count+1;
+      if(_enc->rc.twopass_buffer_bytes){
+        int qti;
+        /*We already read the metrics for the first frame in the window.*/
+        *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
+        _enc->rc.nframe_metrics++;
+        qti=_enc->rc.cur_metrics.frame_type;
+        _enc->rc.nframes[qti]++;
+        _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
+        _enc->rc.scale_sum[qti]+=_enc->rc.cur_metrics.scale;
+        _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
+        if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
+          /*We need more frame data.*/
+          _enc->rc.twopass_buffer_bytes=0;
+        }
+      }
+    }
+    /*Otherwise, we could shrink the size of the current window, if necessary,
+       but leaving it like it is lets us adapt to the new buffer size more
+       gracefully.*/
+  }
+}
+
+/*Scale the number of frames by the number of expected drops/duplicates.*/
+static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){
+  if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){
+    ogg_int64_t dup_scale;
+    dup_scale=oc_bexp64((_rc->log_drop_scale
+     +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8));
+    if(dup_scale<_nframes<<8){
+      int dup_scalei;
+      dup_scalei=(int)dup_scale;
+      if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei;
+    }
+    else _nframes=!!_nframes;
+  }
+  return _nframes;
+}
+
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
+  ogg_int64_t  rate_total;
+  int          nframes[2];
+  int          buf_delay;
+  ogg_int64_t  log_qtarget;
+  ogg_int64_t  log_scale0;
+  int          old_qi;
+  int          qi;
+  /*Figure out how to re-distribute bits so that we hit our fullness target
+     before the last keyframe in our current buffer window (after the current
+     frame), or the end of the buffer window, whichever comes first.*/
+  switch(_enc->rc.twopass){
+    default:{
+      ogg_uint32_t next_key_frame;
+      /*Single pass mode: assume only forced keyframes and attempt to estimate
+         the drop count for VFR content.*/
+      next_key_frame=_qti?_enc->keyframe_frequency_force
+       -(_enc->state.curframe_num-_enc->state.keyframe_num):0;
+      nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay)
+       +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force;
+      if(nframes[0]+_qti>1){
+        nframes[0]--;
+        buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force;
+      }
+      else buf_delay=_enc->rc.buf_delay;
+      nframes[1]=buf_delay-nframes[0];
+      /*Downgrade the delta frame rate to correspond to the recent drop count
+         history.*/
+      nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]);
+    }break;
+    case 1:{
+      /*Pass 1 mode: use a fixed qi value.*/
+      qi=_enc->state.qis[0];
+      _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi];
+      return qi;
+    }break;
+    case 2:{
+      ogg_int64_t scale_sum[2];
+      int         qti;
+      int         buf_pad;
+      /*Pass 2 mode: we know exactly how much of each frame type there is in
+         the current buffer window, and have estimates for the scales.*/
+      nframes[0]=_enc->rc.nframes[0];
+      nframes[1]=_enc->rc.nframes[1];
+      scale_sum[0]=_enc->rc.scale_sum[0];
+      scale_sum[1]=_enc->rc.scale_sum[1];
+      /*The window size can be slightly larger than the buffer window for VFR
+         content; clamp it down, if appropriate (the excess will all be dup
+         frames).*/
+      buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0,
+       _enc->rc.buf_delay);
+      /*If we're approaching the end of the file, add some slack to keep us
+         from slamming into a rail.
+        Our rate accuracy goes down, but it keeps the result sensible.
+        We position the target where the first forced keyframe beyond the end
+         of the file would be (for consistency with 1-pass mode).*/
+      buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num
+       +_enc->keyframe_frequency_force-_enc->rc.scale_window0);
+      if(buf_delay<buf_pad)buf_pad-=buf_delay;
+      else{
+        /*Otherwise, search for the last keyframe in the buffer window and
+           target that.*/
+        buf_pad=0;
+        /*TODO: Currently we only do this when using a finite buffer; we could
+           save the position of the last keyframe in the summary data and do it
+           with a whole-file buffer as well, but it isn't likely to make a
+           difference.*/
+        if(_enc->rc.frame_metrics!=NULL){
+          int fmi;
+          int fm_tail;
+          fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics;
+          if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics;
+          for(fmi=fm_tail;;){
+            oc_frame_metrics *m;
+            fmi--;
+            if(fmi<0)fmi+=_enc->rc.cframe_metrics;
+            /*Stop before we remove the first frame.*/
+            if(fmi==_enc->rc.frame_metrics_head)break;
+            m=_enc->rc.frame_metrics+fmi;
+            /*If we find a keyframe, remove it and everything past it.*/
+            if(m->frame_type==OC_INTRA_FRAME){
+              do{
+                qti=m->frame_type;
+                nframes[qti]--;
+                scale_sum[qti]-=m->scale;
+                buf_delay-=m->dup_count+1;
+                fmi++;
+                if(fmi>=_enc->rc.cframe_metrics)fmi=0;
+                m=_enc->rc.frame_metrics+fmi;
+              }
+              while(fmi!=fm_tail);
+              /*And stop scanning backwards.*/
+              break;
+            }
+          }
+        }
+      }
+      /*If we're not using the same frame type as in pass 1 (because someone
+         changed the keyframe interval), remove that scale estimate.
+        We'll add in a replacement for the correct frame type below.*/
+      qti=_enc->rc.cur_metrics.frame_type;
+      if(qti!=_qti){
+        nframes[qti]--;
+        scale_sum[qti]-=_enc->rc.cur_metrics.scale;
+      }
+      /*Compute corrected log_scale estimates for each frame type from the
+         pass-1 scales we measured in the current window.*/
+      for(qti=0;qti<2;qti++){
+        oc_log_linear_fit *fit;
+        ogg_int64_t        x;
+        x=nframes[qti]>0?
+         oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
+         -_enc->rc.log_npixels;
+        fit=_enc->rc.corr+qti;
+        if(fit->n>0){
+          ogg_int64_t  var;
+          ogg_uint32_t n_2;
+          n_2=fit->n>>1;
+          var=fit->x2;
+          var-=(fit->x+2048>>12)*(((fit->x+2048>>12)+n_2)/fit->n);
+          if(var>fit->n){
+            ogg_int64_t cov;
+            ogg_int64_t beta;
+            ogg_int64_t alpha;
+            ogg_int64_t y;
+            cov=fit->xy;
+            cov-=(fit->y+2048>>12)*(((fit->x+2048>>12)+n_2)/fit->n);
+            /*beta is Q33.*/
+            beta=((cov+n_2)/fit->n<<33)/((var+n_2)/fit->n);
+            /*alpha is Q57.*/
+            alpha=((fit->y+n_2)/fit->n<<33)-beta*((fit->x+n_2)/fit->n);
+            /*Predict the mean y from the mean x.
+              What we're really trying to compensate for is error in exp[], not
+               error in the scales, and hence we can apply the correction to
+               the mean scale instead of applying it to each pass-1 scale and
+               then taking the mean.*/
+            y=(x+((ogg_int64_t)1<<32)>>33)*beta+alpha;
+            /*If we have enough points for a good estimation, use the corrected
+               predictor value directly.*/
+            if(fit->n>=(128<<qti))x=y;
+            /*Otherwise interpolate between the two.*/
+            else x+=fit->n*(y-x>>7+qti);
+          }
+        }
+        _enc->rc.log_scale[qti]=x;
+      }
+      /*If we're not using the same frame type as in pass 1, add a scale
+         estimate for the corresponding frame using the current low-pass
+         filter value.
+        This is mostly to ensure we have a valid estimate even when pass 1 had
+         no frames of this type in the buffer window.
+        TODO: We could also plan ahead and figure out how many keyframes we'll
+         be forced to add in the current buffer window.*/
+      qti=_enc->rc.cur_metrics.frame_type;
+      if(qti!=_qti){
+        ogg_int64_t scale;
+        scale=oc_bexp_q24(_enc->rc.log_scale[_qti])*(ogg_int64_t)nframes[_qti];
+        nframes[_qti]++;
+        scale+=_enc->rc.scalefilter[_qti].y[0];
+        _enc->rc.log_scale[_qti]=oc_blog64(scale)-OC_Q57(24);
+      }
+      /*Add the padding from above.
+        This basically reverts to 1-pass estimations in the last keyframe
+         interval.*/
+      if(buf_pad>0){
+        ogg_int64_t scale;
+        int         nextra_frames;
+        /*Extend the buffer.*/
+        buf_delay+=buf_pad;
+        /*Add virtual delta frames according to the estimated drop count.*/
+        nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad);
+        /*And blend in the low-pass filtered scale according to how many frames
+           we added.*/
+        scale=oc_bexp_q24(_enc->rc.log_scale[1])*(ogg_int64_t)nframes[1]
+         +_enc->rc.scalefilter[1].y[0]*(ogg_int64_t)nextra_frames;
+        nframes[1]+=nextra_frames;
+        _enc->rc.log_scale[1]=
+         oc_blog64((scale+(nframes[1]>>1))/nframes[1])-OC_Q57(24);
+      }
+    }break;
+  }
+  /*rate_total is the total bits available over the next buf_delay frames.*/
+  rate_total=_enc->rc.fullness-_enc->rc.target
+   +buf_delay*_enc->rc.bits_per_frame;
+  log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
+  /*If there aren't enough bits to achieve our desired fullness level, use the
+     minimum quality permitted.*/
+  if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG;
+  else{
+    static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL;
+    ogg_int64_t log_scale1;
+    ogg_int64_t rlo;
+    ogg_int64_t rhi;
+    log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels;
+    rlo=0;
+    rhi=(rate_total+nframes[_qti]-1)/nframes[_qti];
+    while(rlo<rhi){
+      ogg_int64_t curr;
+      ogg_int64_t rdiff;
+      ogg_int64_t log_rpow;
+      ogg_int64_t rscale;
+      curr=rlo+rhi>>1;
+      log_rpow=oc_blog64(curr)-log_scale0;
+      log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti];
+      if(_qti)log_rpow+=LOG_KEY_RATIO>>6;
+      else log_rpow-=LOG_KEY_RATIO>>6;
+      log_rpow*=_enc->rc.exp[1-_qti];
+      rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow);
+      rdiff=nframes[_qti]*curr+rscale-rate_total;
+      if(rdiff<0)rlo=curr+1;
+      else if(rdiff>0)rhi=curr-1;
+      else break;
+    }
+    log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/
+     _enc->rc.exp[_qti]<<6);
+    log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
+  }
+  /*The above allocation looks only at the total rate we'll accumulate in the
+     next buf_delay frames.
+    However, we could overflow the buffer on the very next frame, so check for
+     that here, if we're not using a soft target.*/
+  if(_enc->rc.cap_overflow){
+    ogg_int64_t margin;
+    ogg_int64_t soft_limit;
+    ogg_int64_t log_soft_limit;
+    ogg_int64_t log_qexp;
+    int         exp0;
+    /*Allow 3% of the buffer for prediction error.
+      This should be plenty, and we don't mind if we go a bit over; we only
+       want to keep these bits from being completely wasted.*/
+    margin=_enc->rc.max+31>>5;
+    /*We want to use at least this many bits next frame.*/
+    soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
+    log_soft_limit=oc_blog64(soft_limit);
+    /*If we're predicting we won't use that many...*/
+    exp0=_enc->rc.exp[_qti];
+    log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
+    if(log_scale0-log_qexp<log_soft_limit){
+      /*Scale the adjustment based on how far into the margin we are.*/
+      log_qexp+=(log_scale0-log_soft_limit-log_qexp>>32)*
+       ((OC_MINI(margin,soft_limit)<<32)/margin);
+      log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
+    }
+  }
+  /*If this was not one of the initial frames, limit the change in quality.*/
+  old_qi=_enc->state.qis[0];
+  if(_clamp){
+    ogg_int64_t log_qmin;
+    ogg_int64_t log_qmax;
+    /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
+       current quantizer.
+      TODO: With user-specified quant matrices, we need to enlarge these limits
+       if they don't actually let us change qi values.*/
+    log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL;
+    log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL;
+    log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
+  }
+  /*The above allocation looks only at the total rate we'll accumulate in the
+     next buf_delay frames.
+    However, we could bust the budget on the very next frame, so check for that
+     here, if we're not using a soft target.*/
+  if(!_enc->rc.cap_underflow||_enc->rc.drop_frames){
+    ogg_int64_t log_hard_limit;
+    ogg_int64_t log_qexp;
+    int         exp0;
+    /*Compute the maximum number of bits we can use in the next frame.
+      Allow 50% of the rate for a single frame for prediction error.
+      This may not be enough for keyframes or sudden changes in complexity.*/
+    log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
+    /*If we're predicting we'll use more than this...*/
+    exp0=_enc->rc.exp[_qti];
+    log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
+    if(log_scale0-log_qexp>log_hard_limit){
+      /*Force the target to hit our limit exactly.*/
+      log_qexp=log_scale0-log_hard_limit;
+      log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
+      /*If that target is unreasonable, oh well; we'll have to drop.*/
+      log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
+    }
+  }
+  qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
+   _enc->state.info.quality,log_qtarget);
+  /*Save the quantizer target for lambda calculations.*/
+  _enc->rc.log_qtarget=log_qtarget;
+  return qi;
+}
+
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable){
+  ogg_int64_t buf_delta;
+  ogg_int64_t log_scale;
+  ogg_int32_t scale;
+  int         dropped;
+  dropped=0;
+  if(!_enc->rc.drop_frames)_droppable=0;
+  buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count);
+  if(_bits<=0){
+    /*We didn't code any blocks in this frame.*/
+    log_scale=OC_Q57(-64);
+    _bits=0;
+    scale=0;
+  }
+  else{
+    ogg_int64_t log_bits;
+    ogg_int64_t log_qexp;
+    /*Compute the estimated scale factor for this frame type.*/
+    log_bits=oc_blog64(_bits);
+    log_qexp=_enc->rc.log_qtarget-OC_Q57(2);
+    log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]);
+    log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16));
+    scale=oc_bexp_q24(log_scale);
+  }
+  /*Special two-pass processing.*/
+  switch(_enc->rc.twopass){
+    case 1:{
+      /*Pass-1 mode: save the metrics for this frame.*/
+      _enc->rc.cur_metrics.scale=scale;
+      _enc->rc.cur_metrics.dup_count=_enc->dup_count;
+      _enc->rc.cur_metrics.frame_type=_enc->state.frame_type;
+      _enc->rc.twopass_buffer_bytes=0;
+    }break;
+    case 2:{
+      /*Pass 2 mode:*/
+      /*Accumulate statistics for estimation bias correction.
+        Everything is done in Q24 format.*/
+      if(_bits>0&&_enc->rc.cur_metrics.frame_type==_qti){
+        oc_log_linear_fit *fit;
+        ogg_int64_t        x;
+        ogg_int64_t        y;
+        x=oc_blog64(_enc->rc.cur_metrics.scale)-OC_Q57(24)>>33;
+        y=log_scale>>33;
+        fit=_enc->rc.corr+_qti;
+        /*Use long-term exponential moving averages for the fit statistics.*/
+        if(fit->n>=1000){
+          fit->n>>=1;
+          fit->x>>=1;
+          fit->y>>=1;
+          fit->x2>>=1;
+          fit->xy>>=1;
+        }
+        fit->n++;
+        fit->x+=x;
+        fit->y+=y;
+        fit->x2+=(x+2048>>12)*(x+2048>>12);
+        fit->xy+=(x+2048>>12)*(y+2048>>12);
+      }
+      if(!_trial){
+        ogg_int64_t next_frame_num;
+        int         qti;
+        /*Move the current metrics back one frame.*/
+        *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics;
+        next_frame_num=_enc->state.curframe_num+_enc->dup_count+1;
+        /*Back out the last frame's statistics from the sliding window.*/
+        qti=_enc->rc.prev_metrics.frame_type;
+        _enc->rc.frames_left[qti]--;
+        _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count;
+        _enc->rc.nframes[qti]--;
+        _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count;
+        _enc->rc.scale_sum[qti]-=_enc->rc.prev_metrics.scale;
+        _enc->rc.scale_window0=(int)next_frame_num;
+        /*Free the corresponding entry in the circular buffer.*/
+        if(_enc->rc.frame_metrics!=NULL){
+          _enc->rc.nframe_metrics--;
+          _enc->rc.frame_metrics_head++;
+          if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){
+            _enc->rc.frame_metrics_head=0;
+          }
+        }
+        /*Mark us ready for the next 2-pass packet.*/
+        _enc->rc.twopass_buffer_bytes=0;
+        /*Update state, so the user doesn't have to keep calling 2pass_in after
+           they've fed in all the data when we're using a finite buffer.*/
+        _enc->prev_dup_count=_enc->dup_count;
+        oc_enc_rc_2pass_in(_enc,NULL,0);
+      }
+    }break;
+  }
+  /*Common to all passes:*/
+  if(_bits>0){
+    /*Use the estimated scale factor directly directly if this was a
+       trial.*/
+    if(_trial)_enc->rc.log_scale[_qti]=log_scale;
+    else{
+      /*Otherwise update the low-pass scale filter for this frame type,
+         regardless of whether or not we dropped this frame.*/
+      _enc->rc.log_scale[_qti]=oc_blog64(oc_iir_filter_update(
+       _enc->rc.scalefilter+_qti,scale))-OC_Q57(24);
+      /*If this frame busts our budget, it must be dropped.*/
+      if(_droppable&&_enc->rc.fullness+buf_delta<_bits){
+        _enc->rc.prev_drop_count+=1+_enc->dup_count;
+        _bits=0;
+        dropped=1;
+      }
+      else{
+        ogg_uint32_t drop_count;
+        /*Update a low-pass filter to estimate the "real" frame rate taking
+           drops and duplicates into account.
+          This is only done if the frame is coded, as it needs the final
+           count of dropped frames.*/
+        drop_count=_enc->rc.prev_drop_count+1;
+        if(drop_count>0x7F)drop_count=0x7FFFFFFF;
+        else drop_count<<=24;
+        _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update(
+         &_enc->rc.vfrfilter,drop_count))-OC_Q57(24);
+        /*Initialize the drop count for this frame to the user-requested dup
+           count.
+          It will be increased if we drop more frames.*/
+        _enc->rc.prev_drop_count=_enc->dup_count;
+      }
+    }
+  }
+  /*Increase the drop count.*/
+  else _enc->rc.prev_drop_count+=1+_enc->dup_count;
+  /*And update the buffer fullness level.*/
+  if(!_trial){
+    _enc->rc.fullness+=buf_delta-_bits;
+    /*If we're too quick filling the buffer and overflow is capped,
+      that rate is lost forever.*/
+    if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){
+      _enc->rc.fullness=_enc->rc.max;
+    }
+    /*If we're too quick draining the buffer and underflow is capped,
+      don't try to make up that rate later.*/
+    if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
+      _enc->rc.fullness=0;
+    }
+  }
+  return dropped;
+}
+
+#define OC_RC_2PASS_HDR_SZ    (38)
+#define OC_RC_2PASS_PACKET_SZ (8)
+
+static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){
+  while(_bytes-->0){
+    _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF);
+    _val>>=8;
+  }
+}
+
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){
+  if(_enc->rc.twopass_buffer_bytes==0){
+    if(_enc->rc.twopass==0){
+      int qi;
+      /*Pick first-pass qi for scale calculations.*/
+      qi=oc_enc_select_qi(_enc,0,0);
+      _enc->state.nqis=1;
+      _enc->state.qis[0]=qi;
+      _enc->rc.twopass=1;
+      _enc->rc.frames_total[0]=_enc->rc.frames_total[1]=
+       _enc->rc.frames_total[2]=0;
+      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
+      /*Fill in dummy summary values.*/
+      oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
+      oc_rc_buffer_val(&_enc->rc,0,4);
+      oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8);
+    }
+    else{
+      int qti;
+      qti=_enc->rc.cur_metrics.frame_type;
+      _enc->rc.scale_sum[qti]+=_enc->rc.cur_metrics.scale;
+      _enc->rc.frames_total[qti]++;
+      _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count;
+      oc_rc_buffer_val(&_enc->rc,
+       _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4);
+      oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.scale,4);
+    }
+  }
+  else if(_enc->packet_state==OC_PACKET_DONE&&
+   _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){
+    _enc->rc.twopass_buffer_bytes=0;
+    oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
+    oc_rc_buffer_val(&_enc->rc,0,4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8);
+  }
+  else{
+    /*The data for this frame has already been retrieved.*/
+    *_buf=NULL;
+    return 0;
+  }
+  *_buf=_enc->rc.twopass_buffer;
+  return _enc->rc.twopass_buffer_bytes;
+}
+
+static size_t oc_rc_buffer_fill(oc_rc_state *_rc,
+ unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){
+  while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){
+    _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++];
+  }
+  return _consumed;
+}
+
+static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){
+  ogg_int64_t ret;
+  int         shift;
+  ret=0;
+  shift=0;
+  while(_bytes-->0){
+    ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<<shift;
+    shift+=8;
+  }
+  return ret;
+}
+
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes){
+  size_t consumed;
+  consumed=0;
+  /*Enable pass 2 mode if this is the first call.*/
+  if(_enc->rc.twopass==0){
+    _enc->rc.twopass=2;
+    _enc->rc.twopass_buffer_fill=0;
+    _enc->rc.frames_total[0]=0;
+    _enc->rc.nframe_metrics=0;
+    _enc->rc.cframe_metrics=0;
+    _enc->rc.frame_metrics_head=0;
+    _enc->rc.scale_window0=0;
+    _enc->rc.scale_window_end=0;
+  }
+  /*If we haven't got a valid summary header yet, try to parse one.*/
+  if(_enc->rc.frames_total[0]==0){
+    if(!_buf){
+      int frames_needed;
+      /*If we're using a whole-file buffer, we just need the first frame.
+        Otherwise, we may need as many as one per buffer slot.*/
+      frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay;
+      return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ
+       -_enc->rc.twopass_buffer_fill;
+    }
+    consumed=oc_rc_buffer_fill(&_enc->rc,
+     _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ);
+    if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){
+      ogg_int64_t scale_sum[2];
+      int         exp[2];
+      int         buf_delay;
+      /*Read the summary header data.*/
+      /*Check the magic value and version number.*/
+      if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F||
+       oc_rc_unbuffer_val(&_enc->rc,4)!=0){
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_ENOTFORMAT;
+      }
+      _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
+      exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
+      scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8);
+      scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8);
+      /*Make sure the file claims to have at least one frame.
+        Otherwise we probably got the placeholder data from an aborted pass 1.
+        Also make sure the total frame count doesn't overflow an integer.*/
+      buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+       +_enc->rc.frames_total[2];
+      if(_enc->rc.frames_total[0]==0||buf_delay<0||
+       (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]||
+       (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){
+        _enc->rc.frames_total[0]=0;
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_EBADHEADER;
+      }
+      /*Got a valid header; set up pass 2.*/
+      _enc->rc.frames_left[0]=_enc->rc.frames_total[0];
+      _enc->rc.frames_left[1]=_enc->rc.frames_total[1];
+      _enc->rc.frames_left[2]=_enc->rc.frames_total[2];
+      /*If the user hasn't specified a buffer size, use the whole file.*/
+      if(_enc->rc.frame_metrics==NULL){
+        _enc->rc.buf_delay=buf_delay;
+        _enc->rc.nframes[0]=_enc->rc.frames_total[0];
+        _enc->rc.nframes[1]=_enc->rc.frames_total[1];
+        _enc->rc.nframes[2]=_enc->rc.frames_total[2];
+        _enc->rc.scale_sum[0]=scale_sum[0];
+        _enc->rc.scale_sum[1]=scale_sum[1];
+        _enc->rc.scale_window_end=buf_delay;
+        oc_enc_rc_reset(_enc);
+      }
+      _enc->rc.exp[0]=exp[0];
+      _enc->rc.exp[1]=exp[1];
+      memset(_enc->rc.corr,0,sizeof(_enc->rc.corr));
+      /*Clear the header data from the buffer to make room for packet data.*/
+      _enc->rc.twopass_buffer_fill=0;
+      _enc->rc.twopass_buffer_bytes=0;
+    }
+  }
+  if(_enc->rc.frames_total[0]!=0){
+    ogg_int64_t curframe_num;
+    int         nframes_total;
+    curframe_num=_enc->state.curframe_num;
+    if(curframe_num>=0){
+      /*We just encoded a frame; make sure things matched.*/
+      if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_EINVAL;
+      }
+    }
+    curframe_num+=_enc->prev_dup_count+1;
+    nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+     +_enc->rc.frames_total[2];
+    if(curframe_num>=nframes_total){
+      /*We don't want any more data after the last frame, and we don't want to
+         allow any more frames to be encoded.*/
+      _enc->rc.twopass_buffer_bytes=0;
+    }
+    else if(_enc->rc.twopass_buffer_bytes==0){
+      if(_enc->rc.frame_metrics==NULL){
+        /*We're using a whole-file buffer:*/
+        if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill;
+        consumed=oc_rc_buffer_fill(&_enc->rc,
+         _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
+        if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
+          ogg_uint32_t dup_count;
+          ogg_int32_t  scale;
+          int          qti;
+          int          arg;
+          /*Read the metrics for the next frame.*/
+          dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
+          scale=oc_rc_unbuffer_val(&_enc->rc,4);
+          _enc->rc.cur_metrics.scale=scale;
+          qti=(dup_count&0x80000000)>>31;
+          _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF;
+          _enc->rc.cur_metrics.frame_type=qti;
+          _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME;
+          /*"Helpfully" set the dup count back to what it was in pass 1.*/
+          arg=_enc->rc.cur_metrics.dup_count;
+          th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
+          /*Clear the buffer for the next frame.*/
+          _enc->rc.twopass_buffer_fill=0;
+        }
+      }
+      else{
+        int frames_needed;
+        /*We're using a finite buffer:*/
+        frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
+         -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
+         _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
+         -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
+        while(frames_needed>0){
+          if(!_buf){
+            return OC_RC_2PASS_PACKET_SZ*frames_needed
+           -_enc->rc.twopass_buffer_fill;
+          }
+          consumed=oc_rc_buffer_fill(&_enc->rc,
+           _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
+          if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
+            oc_frame_metrics *m;
+            int               fmi;
+            ogg_uint32_t      dup_count;
+            ogg_int32_t       scale;
+            int               qti;
+            /*Read the metrics for the next frame.*/
+            dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
+            scale=oc_rc_unbuffer_val(&_enc->rc,4);
+            /*Add the to the circular buffer.*/
+            fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++;
+            if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics;
+            m=_enc->rc.frame_metrics+fmi;
+            m->scale=scale;
+            qti=(dup_count&0x80000000)>>31;
+            m->dup_count=dup_count&0x7FFFFFFF;
+            m->frame_type=qti;
+            /*And accumulate the statistics over the window.*/
+            _enc->rc.nframes[qti]++;
+            _enc->rc.nframes[2]+=m->dup_count;
+            _enc->rc.scale_sum[qti]+=m->scale;
+            _enc->rc.scale_window_end+=m->dup_count+1;
+            /*Compute an upper bound on the number of remaining packets needed
+               for the current window.*/
+            frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
+             -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
+             _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
+             -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
+            /*Clear the buffer for the next frame.*/
+            _enc->rc.twopass_buffer_fill=0;
+            _enc->rc.twopass_buffer_bytes=0;
+          }
+          /*Go back for more data.*/
+          else break;
+        }
+        /*If we've got all the frames we need, fill in the current metrics.
+          We're ready to go.*/
+        if(frames_needed<=0){
+          int arg;
+          *&_enc->rc.cur_metrics=
+           *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head);
+          _enc->rc.twopass_force_kf=
+           _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME;
+          /*"Helpfully" set the dup count back to what it was in pass 1.*/
+          arg=_enc->rc.cur_metrics.dup_count;
+          th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
+          /*Mark us ready for the next frame.*/
+          _enc->rc.twopass_buffer_bytes=1;
+        }
+      }
+    }
+  }
+  return (int)consumed;
+}

Copied: trunk/theora/lib/state.c (from rev 16442, trunk/theora/lib/dec/state.c)
===================================================================
--- trunk/theora/lib/state.c	                        (rev 0)
+++ trunk/theora/lib/state.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,1227 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include "internal.h"
+#if defined(OC_X86_ASM)
+#if defined(_MSC_VER)
+# include "x86_vc/x86int.h"
+#else
+# include "x86/x86int.h"
+#endif
+#endif
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+
+/*Returns the fragment index of the top-left block in a macro block.
+  This can be used to test whether or not the whole macro block is valid.
+  _sb_map: The super block map.
+  _quadi:  The quadrant number.
+  Return: The index of the fragment of the upper left block in the macro
+   block, or -1 if the block lies outside the coded frame.*/
+static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
+  /*It so happens that under the Hilbert curve ordering described below, the
+     upper-left block in each macro block is at index 0, except in macro block
+     3, where it is at index 2.*/
+  return _sb_map[_quadi][_quadi&_quadi<<1];
+}
+
+/*Fills in the mapping from block positions to fragment numbers for a single
+   color plane.
+  This function also fills in the "valid" flag of each quadrant in the super
+   block flags.
+  _sb_maps:  The array of super block maps for the color plane.
+  _sb_flags: The array of super block flags for the color plane.
+  _frag0:    The index of the first fragment in the plane.
+  _hfrags:   The number of horizontal fragments in a coded frame.
+  _vfrags:   The number of vertical fragments in a coded frame.*/
+static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
+ oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
+  /*Contains the (macro_block,block) indices for a 4x4 grid of
+     fragments.
+    The pattern is a 4x4 Hilbert space-filling curve.
+    A Hilbert curve has the nice property that as the curve grows larger, its
+     fractal dimension approaches 2.
+    The intuition is that nearby blocks in the curve are also close spatially,
+     with the previous element always an immediate neighbor, so that runs of
+     blocks should be well correlated.*/
+  static const int SB_MAP[4][4][2]={
+    {{0,0},{0,1},{3,2},{3,3}},
+    {{0,3},{0,2},{3,1},{3,0}},
+    {{1,0},{1,3},{2,0},{2,3}},
+    {{1,1},{1,2},{2,1},{2,2}}
+  };
+  ptrdiff_t  yfrag;
+  unsigned   sbi;
+  int        y;
+  sbi=0;
+  yfrag=_frag0;
+  for(y=0;;y+=4){
+    int imax;
+    int x;
+    /*Figure out how many columns of blocks in this super block lie within the
+       image.*/
+    imax=_vfrags-y;
+    if(imax>4)imax=4;
+    else if(imax<=0)break;
+    for(x=0;;x+=4,sbi++){
+      ptrdiff_t xfrag;
+      int       jmax;
+      int       quadi;
+      int       i;
+      /*Figure out how many rows of blocks in this super block lie within the
+         image.*/
+      jmax=_hfrags-x;
+      if(jmax>4)jmax=4;
+      else if(jmax<=0)break;
+      /*By default, set all fragment indices to -1.*/
+      memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
+      /*Fill in the fragment map for this super block.*/
+      xfrag=yfrag+x;
+      for(i=0;i<imax;i++){
+        int j;
+        for(j=0;j<jmax;j++){
+          _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
+        }
+        xfrag+=_hfrags;
+      }
+      /*Mark which quadrants of this super block lie within the image.*/
+      for(quadi=0;quadi<4;quadi++){
+        _sb_flags[sbi].quad_valid|=
+         (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
+      }
+    }
+    yfrag+=_hfrags<<2;
+  }
+}
+
+/*Fills in the Y plane fragment map for a macro block given the fragment
+   coordinates of its upper-left hand corner.
+  _mb_map:    The macro block map to fill.
+  _fplane: The description of the Y plane.
+  _xfrag0: The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
+  int i;
+  int j;
+  for(i=0;i<2;i++)for(j=0;j<2;j++){
+    _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  _xfrag0>>=1;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  _mb_map[1][0]=fragi+_fplanes[1].froffset;
+  _mb_map[2][0]=fragi+_fplanes[2].froffset;
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the Y direction.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       j;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(j=0;j<2;j++){
+    _mb_map[1][j]=fragi+_fplanes[1].froffset;
+    _mb_map[2][j]=fragi+_fplanes[2].froffset;
+    fragi++;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       i;
+  _xfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(i=0;i<2;i++){
+    _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
+    _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
+    fragi+=_fplanes[1].nhfrags;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with no chroma decimation (4:4:4).
+  This uses the already filled-in luma plane values.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.*/
+static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3]){
+  int k;
+  for(k=0;k<4;k++){
+    _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
+    _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
+  }
+}
+
+/*The function type used to fill in the chroma plane fragment maps for a
+   macro block.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
+
+/*A table of functions used to fill in the chroma plane fragment maps for a
+   macro block for each type of chrominance decimation.*/
+static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
+  oc_mb_fill_cmapping00,
+  oc_mb_fill_cmapping01,
+  oc_mb_fill_cmapping10,
+  (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
+};
+
+/*Fills in the mapping from macro blocks to their corresponding fragment
+   numbers in each plane.
+  _mb_maps:   The list of macro block maps.
+  _mb_modes:  The list of macro block modes; macro blocks completely outside
+               the coded region are marked invalid.
+  _fplanes:   The descriptions of the fragment planes.
+  _pixel_fmt: The chroma decimation type.*/
+static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
+ signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
+  oc_mb_fill_cmapping_func  mb_fill_cmapping;
+  unsigned                  sbi;
+  int                       y;
+  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
+  /*Loop through the luma plane super blocks.*/
+  for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
+    int x;
+    for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
+      int ymb;
+      /*Loop through the macro blocks in each super block in display order.*/
+      for(ymb=0;ymb<2;ymb++){
+        int xmb;
+        for(xmb=0;xmb<2;xmb++){
+          unsigned mbi;
+          int      mbx;
+          int      mby;
+          mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
+          mbx=x|xmb<<1;
+          mby=y|ymb<<1;
+          /*Initialize fragment indices to -1.*/
+          memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
+          /*Make sure this macro block is within the encoded region.*/
+          if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
+            _mb_modes[mbi]=OC_MODE_INVALID;
+            continue;
+          }
+          /*Fill in the fragment indices for the luma plane.*/
+          oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
+          /*Fill in the fragment indices for the chroma planes.*/
+          (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
+        }
+      }
+    }
+  }
+}
+
+/*Marks the fragments which fall all or partially outside the displayable
+   region of the frame.
+  _state: The Theora state containing the fragments to be marked.*/
+static void oc_state_border_init(oc_theora_state *_state){
+  oc_fragment       *frag;
+  oc_fragment       *yfrag_end;
+  oc_fragment       *xfrag_end;
+  oc_fragment_plane *fplane;
+  int                crop_x0;
+  int                crop_y0;
+  int                crop_xf;
+  int                crop_yf;
+  int                pli;
+  int                y;
+  int                x;
+  /*The method we use here is slow, but the code is dead simple and handles
+     all the special cases easily.
+    We only ever need to do it once.*/
+  /*Loop through the fragments, marking those completely outside the
+     displayable region and constructing a border mask for those that straddle
+     the border.*/
+  _state->nborders=0;
+  yfrag_end=frag=_state->frags;
+  for(pli=0;pli<3;pli++){
+    fplane=_state->fplanes+pli;
+    /*Set up the cropping rectangle for this plane.*/
+    crop_x0=_state->info.pic_x;
+    crop_xf=_state->info.pic_x+_state->info.pic_width;
+    crop_y0=_state->info.pic_y;
+    crop_yf=_state->info.pic_y+_state->info.pic_height;
+    if(pli>0){
+      if(!(_state->info.pixel_fmt&1)){
+        crop_x0=crop_x0>>1;
+        crop_xf=crop_xf+1>>1;
+      }
+      if(!(_state->info.pixel_fmt&2)){
+        crop_y0=crop_y0>>1;
+        crop_yf=crop_yf+1>>1;
+      }
+    }
+    y=0;
+    for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
+      x=0;
+      for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
+        /*First check to see if this fragment is completely outside the
+           displayable region.*/
+        /*Note the special checks for an empty cropping rectangle.
+          This guarantees that if we count a fragment as straddling the
+           border below, at least one pixel in the fragment will be inside
+           the displayable region.*/
+        if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
+         crop_x0>=crop_xf||crop_y0>=crop_yf){
+          frag->invalid=1;
+        }
+        /*Otherwise, check to see if it straddles the border.*/
+        else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
+         y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
+          ogg_int64_t mask;
+          int         npixels;
+          int         i;
+          mask=npixels=0;
+          for(i=0;i<8;i++){
+            int j;
+            for(j=0;j<8;j++){
+              if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
+                mask|=(ogg_int64_t)1<<(i<<3|j);
+                npixels++;
+              }
+            }
+          }
+          /*Search the fragment array for border info with the same pattern.
+            In general, there will be at most 8 different patterns (per
+             plane).*/
+          for(i=0;;i++){
+            if(i>=_state->nborders){
+              _state->nborders++;
+              _state->borders[i].mask=mask;
+              _state->borders[i].npixels=npixels;
+            }
+            else if(_state->borders[i].mask!=mask)continue;
+            frag->borderi=i;
+            break;
+          }
+        }
+        else frag->borderi=-1;
+      }
+    }
+  }
+}
+
+static int oc_state_frarray_init(oc_theora_state *_state){
+  int       yhfrags;
+  int       yvfrags;
+  int       chfrags;
+  int       cvfrags;
+  ptrdiff_t yfrags;
+  ptrdiff_t cfrags;
+  ptrdiff_t nfrags;
+  unsigned  yhsbs;
+  unsigned  yvsbs;
+  unsigned  chsbs;
+  unsigned  cvsbs;
+  unsigned  ysbs;
+  unsigned  csbs;
+  unsigned  nsbs;
+  size_t    nmbs;
+  int       hdec;
+  int       vdec;
+  int       pli;
+  /*Figure out the number of fragments in each plane.*/
+  /*These parameters have already been validated to be multiples of 16.*/
+  yhfrags=_state->info.frame_width>>3;
+  yvfrags=_state->info.frame_height>>3;
+  hdec=!(_state->info.pixel_fmt&1);
+  vdec=!(_state->info.pixel_fmt&2);
+  chfrags=yhfrags+hdec>>hdec;
+  cvfrags=yvfrags+vdec>>vdec;
+  yfrags=yhfrags*(ptrdiff_t)yvfrags;
+  cfrags=chfrags*(ptrdiff_t)cvfrags;
+  nfrags=yfrags+2*cfrags;
+  /*Figure out the number of super blocks in each plane.*/
+  yhsbs=yhfrags+3>>2;
+  yvsbs=yvfrags+3>>2;
+  chsbs=chfrags+3>>2;
+  cvsbs=cvfrags+3>>2;
+  ysbs=yhsbs*yvsbs;
+  csbs=chsbs*cvsbs;
+  nsbs=ysbs+2*csbs;
+  nmbs=(size_t)ysbs<<2;
+  /*Check for overflow.
+    We support the ridiculous upper limits of the specification (1048560 by
+     1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
+     but for those with 32-bit pointers (or smaller!) we have to check.
+    If the caller wants to prevent denial-of-service by imposing a more
+     reasonable upper limit on the size of attempted allocations, they must do
+     so themselves; we have no platform independent way to determine how much
+     system memory there is nor an application-independent way to decide what a
+     "reasonable" allocation is.*/
+  if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
+   ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
+    return TH_EIMPL;
+  }
+  /*Initialize the fragment array.*/
+  _state->fplanes[0].nhfrags=yhfrags;
+  _state->fplanes[0].nvfrags=yvfrags;
+  _state->fplanes[0].froffset=0;
+  _state->fplanes[0].nfrags=yfrags;
+  _state->fplanes[0].nhsbs=yhsbs;
+  _state->fplanes[0].nvsbs=yvsbs;
+  _state->fplanes[0].sboffset=0;
+  _state->fplanes[0].nsbs=ysbs;
+  _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
+  _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
+  _state->fplanes[1].froffset=yfrags;
+  _state->fplanes[2].froffset=yfrags+cfrags;
+  _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
+  _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
+  _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
+  _state->fplanes[1].sboffset=ysbs;
+  _state->fplanes[2].sboffset=ysbs+csbs;
+  _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
+  _state->nfrags=nfrags;
+  _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
+  _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
+  _state->nsbs=nsbs;
+  _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
+  _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
+  _state->nhmbs=yhsbs<<1;
+  _state->nvmbs=yvsbs<<1;
+  _state->nmbs=nmbs;
+  _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
+  _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
+  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
+  if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
+   _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
+   _state->coded_fragis==NULL){
+    return TH_EFAULT;
+  }
+  /*Create the mapping from super blocks to fragments.*/
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    fplane=_state->fplanes+pli;
+    oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
+     _state->sb_flags+fplane->sboffset,fplane->froffset,
+     fplane->nhfrags,fplane->nvfrags);
+  }
+  /*Create the mapping from macro blocks to fragments.*/
+  oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
+   _state->fplanes,_state->info.pixel_fmt);
+  /*Initialize the invalid and borderi fields of each fragment.*/
+  oc_state_border_init(_state);
+  return 0;
+}
+
+static void oc_state_frarray_clear(oc_theora_state *_state){
+  _ogg_free(_state->coded_fragis);
+  _ogg_free(_state->mb_modes);
+  _ogg_free(_state->mb_maps);
+  _ogg_free(_state->sb_flags);
+  _ogg_free(_state->sb_maps);
+  _ogg_free(_state->frag_mvs);
+  _ogg_free(_state->frags);
+}
+
+
+/*Initializes the buffers used for reconstructed frames.
+  These buffers are padded with 16 extra pixels on each side, to allow
+   unrestricted motion vectors without special casing the boundary.
+  If chroma is decimated in either direction, the padding is reduced by a
+   factor of 2 on the appropriate sides.
+  _nrefs: The number of reference buffers to init; must be 3 or 4.*/
+static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
+  th_info       *info;
+  unsigned char *ref_frame_data;
+  size_t         ref_frame_data_sz;
+  size_t         ref_frame_sz;
+  size_t         yplane_sz;
+  size_t         cplane_sz;
+  int            yhstride;
+  int            yheight;
+  int            chstride;
+  int            cheight;
+  ptrdiff_t      yoffset;
+  ptrdiff_t      coffset;
+  ptrdiff_t     *frag_buf_offs;
+  ptrdiff_t      fragi;
+  int            hdec;
+  int            vdec;
+  int            rfi;
+  int            pli;
+  if(_nrefs<3||_nrefs>4)return TH_EINVAL;
+  info=&_state->info;
+  /*Compute the image buffer parameters for each plane.*/
+  hdec=!(info->pixel_fmt&1);
+  vdec=!(info->pixel_fmt&2);
+  yhstride=info->frame_width+2*OC_UMV_PADDING;
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=yhstride>>hdec;
+  cheight=yheight>>vdec;
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
+  coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
+  ref_frame_sz=yplane_sz+2*cplane_sz;
+  ref_frame_data_sz=_nrefs*ref_frame_sz;
+  /*Check for overflow.
+    The same caveats apply as for oc_state_frarray_init().*/
+  if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
+   ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
+    return TH_EIMPL;
+  }
+  ref_frame_data=_ogg_malloc(ref_frame_data_sz);
+  frag_buf_offs=_state->frag_buf_offs=
+   _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
+  if(ref_frame_data==NULL||frag_buf_offs==NULL){
+    _ogg_free(frag_buf_offs);
+    _ogg_free(ref_frame_data);
+    return TH_EFAULT;
+  }
+  /*Set up the width, height and stride for the image buffers.*/
+  _state->ref_frame_bufs[0][0].width=info->frame_width;
+  _state->ref_frame_bufs[0][0].height=info->frame_height;
+  _state->ref_frame_bufs[0][0].stride=yhstride;
+  _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
+   info->frame_width>>hdec;
+  _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
+   info->frame_height>>vdec;
+  _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
+   chstride;
+  for(rfi=1;rfi<_nrefs;rfi++){
+    memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
+     sizeof(_state->ref_frame_bufs[0]));
+  }
+  /*Set up the data pointers for the image buffers.*/
+  for(rfi=0;rfi<_nrefs;rfi++){
+    _state->ref_frame_data[rfi]=ref_frame_data;
+    _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
+    ref_frame_data+=yplane_sz;
+    _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz;
+    _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz;
+    /*Flip the buffer upside down.
+      This allows us to decode Theora's bottom-up frames in their natural
+       order, yet return a top-down buffer with a positive stride to the user.*/
+    oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
+     _state->ref_frame_bufs[rfi]);
+  }
+  _state->ref_ystride[0]=-yhstride;
+  _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
+  /*Initialize the fragment buffer offsets.*/
+  ref_frame_data=_state->ref_frame_data[0];
+  fragi=0;
+  for(pli=0;pli<3;pli++){
+    th_img_plane      *iplane;
+    oc_fragment_plane *fplane;
+    unsigned char     *vpix;
+    ptrdiff_t          stride;
+    ptrdiff_t          vfragi_end;
+    int                nhfrags;
+    iplane=_state->ref_frame_bufs[0]+pli;
+    fplane=_state->fplanes+pli;
+    vpix=iplane->data;
+    vfragi_end=fplane->froffset+fplane->nfrags;
+    nhfrags=fplane->nhfrags;
+    stride=iplane->stride;
+    while(fragi<vfragi_end){
+      ptrdiff_t      hfragi_end;
+      unsigned char *hpix;
+      hpix=vpix;
+      for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
+        frag_buf_offs[fragi]=hpix-ref_frame_data;
+        hpix+=8;
+      }
+      vpix+=stride<<3;
+    }
+  }
+  /*Initialize the reference frame indices.*/
+  _state->ref_frame_idx[OC_FRAME_GOLD]=
+   _state->ref_frame_idx[OC_FRAME_PREV]=
+   _state->ref_frame_idx[OC_FRAME_SELF]=-1;
+  _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1;
+  return 0;
+}
+
+static void oc_state_ref_bufs_clear(oc_theora_state *_state){
+  _ogg_free(_state->frag_buf_offs);
+  _ogg_free(_state->ref_frame_data[0]);
+}
+
+
+void oc_state_vtable_init_c(oc_theora_state *_state){
+  _state->opt_vtable.frag_copy=oc_frag_copy_c;
+  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
+  _state->opt_vtable.idct8x8=oc_idct8x8_c;
+  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
+  _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
+  _state->opt_vtable.state_loop_filter_frag_rows=
+   oc_state_loop_filter_frag_rows_c;
+  _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
+}
+
+/*Initialize the accelerated function pointers.*/
+void oc_state_vtable_init(oc_theora_state *_state){
+#if defined(OC_X86_ASM)
+  oc_state_vtable_init_x86(_state);
+#else
+  oc_state_vtable_init_c(_state);
+#endif
+}
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
+  int ret;
+  /*First validate the parameters.*/
+  if(_info==NULL)return TH_EFAULT;
+  /*The width and height of the encoded frame must be multiples of 16.
+    They must also, when divided by 16, fit into a 16-bit unsigned integer.
+    The displayable frame offset coordinates must fit into an 8-bit unsigned
+     integer.
+    Note that the offset Y in the API is specified on the opposite side from
+     how it is specified in the bitstream, because the Y axis is flipped in
+     the bitstream.
+    The displayable frame must fit inside the encoded frame.
+    The color space must be one known by the encoder.*/
+  if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
+   _info->frame_width<=0||_info->frame_width>=0x100000||
+   _info->frame_height<=0||_info->frame_height>=0x100000||
+   _info->pic_x+_info->pic_width>_info->frame_width||
+   _info->pic_y+_info->pic_height>_info->frame_height||
+   _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
+   /*Note: the following <0 comparisons may generate spurious warnings on
+      platforms where enums are unsigned.
+     We could cast them to unsigned and just use the following >= comparison,
+      but there are a number of compilers which will mis-optimize this.
+     It's better to live with the spurious warnings.*/
+   _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
+   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
+    return TH_EINVAL;
+  }
+  memset(_state,0,sizeof(*_state));
+  memcpy(&_state->info,_info,sizeof(*_info));
+  /*Invert the sense of pic_y to match Theora's right-handed coordinate
+     system.*/
+  _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  _state->frame_type=OC_UNKWN_FRAME;
+  oc_state_vtable_init(_state);
+  ret=oc_state_frarray_init(_state);
+  if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
+  if(ret<0){
+    oc_state_frarray_clear(_state);
+    return ret;
+  }
+  /*If the keyframe_granule_shift is out of range, use the maximum allowable
+     value.*/
+  if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
+    _state->info.keyframe_granule_shift=31;
+  }
+  _state->keyframe_num=0;
+  _state->curframe_num=-1;
+  /*3.2.0 streams mark the frame index instead of the frame count.
+    This was changed with stream version 3.2.1 to conform to other Ogg
+     codecs.
+    We add an extra bias when computing granule positions for new streams.*/
+  _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
+  return 0;
+}
+
+void oc_state_clear(oc_theora_state *_state){
+  oc_state_ref_bufs_clear(_state);
+  oc_state_frarray_clear(_state);
+}
+
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the left and right borders, and only for the fragment
+   rows specified.
+  _refi: The index of the reference buffer to pad.
+  _pli:  The color plane.
+  _y0:   The Y coordinate of the first row to pad.
+  _yend: The Y coordinate of the row to stop padding at.*/
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  apix=iplane->data+_y0*(ptrdiff_t)stride;
+  bpix=apix+iplane->width-1;
+  epix=iplane->data+_yend*(ptrdiff_t)stride;
+  /*Note the use of != instead of <, which allows the stride to be negative.*/
+  while(apix!=epix){
+    memset(apix-hpadding,apix[0],hpadding);
+    memset(bpix+1,bpix[0],hpadding);
+    apix+=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the top and bottom borders, and must be called after
+   the left and right borders are added.
+  _refi:      The index of the reference buffer to pad.
+  _pli:       The color plane.*/
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  int            vpadding;
+  int            fullw;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  fullw=iplane->width+(hpadding<<1);
+  apix=iplane->data-hpadding;
+  bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
+  epix=apix-stride*(ptrdiff_t)vpadding;
+  while(apix!=epix){
+    memcpy(apix-stride,apix,fullw);
+    memcpy(bpix+stride,bpix,fullw);
+    apix-=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the given reference image out into
+   the surrounding padding for use by unrestricted motion vectors.
+  _state: The context containing the reference buffers.
+  _refi:  The index of the reference buffer to pad.*/
+void oc_state_borders_fill(oc_theora_state *_state,int _refi){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    oc_state_borders_fill_rows(_state,_refi,pli,0,
+     _state->ref_frame_bufs[_refi][pli].height);
+    oc_state_borders_fill_caps(_state,_refi,pli);
+  }
+}
+
+/*Determines the offsets in an image buffer to use for motion compensation.
+  _state:   The Theora state the offsets are to be computed with.
+  _offsets: Returns the offset for the buffer(s).
+            _offsets[0] is always set.
+            _offsets[1] is set if the motion vector has non-zero fractional
+             components.
+  _pli:     The color plane index.
+  _dx:      The X component of the motion vector.
+  _dy:      The Y component of the motion vector.
+  Return: The number of offsets returned: 1 or 2.*/
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,int _dx,int _dy){
+  /*Here is a brief description of how Theora handles motion vectors:
+    Motion vector components are specified to half-pixel accuracy in
+     undecimated directions of each plane, and quarter-pixel accuracy in
+     decimated directions.
+    Integer parts are extracted by dividing (not shifting) by the
+     appropriate amount, with truncation towards zero.
+    These integer values are used to calculate the first offset.
+
+    If either of the fractional parts are non-zero, then a second offset is
+     computed.
+    No third or fourth offsets are computed, even if both components have
+     non-zero fractional parts.
+    The second offset is computed by dividing (not shifting) by the
+     appropriate amount, always truncating _away_ from zero.*/
+#if 0
+  /*This version of the code doesn't use any tables, but is slower.*/
+  int ystride;
+  int xprec;
+  int yprec;
+  int xfrac;
+  int yfrac;
+  int offs;
+  ystride=_state->ref_ystride[_pli];
+  /*These two variables decide whether we are in half- or quarter-pixel
+     precision in each component.*/
+  xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
+  yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
+  /*These two variables are either 0 if all the fractional bits are zero or -1
+     if any of them are non-zero.*/
+  xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
+  yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
+  offs=(_dx>>xprec)+(_dy>>yprec)*ystride;
+  if(xfrac||yfrac){
+    int xmask;
+    int ymask;
+    xmask=OC_SIGNMASK(_dx);
+    ymask=OC_SIGNMASK(_dy);
+    yfrac&=ystride;
+    _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
+    _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
+    return 2;
+  }
+  else{
+    _offsets[0]=offs;
+    return 1;
+  }
+#else
+  /*Using tables simplifies the code, and there's enough arithmetic to hide the
+     latencies of the memory references.*/
+  static const signed char OC_MVMAP[2][64]={
+    {
+          -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
+       -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
+        0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
+        8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
+    },
+    {
+           -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
+       -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
+        0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+        4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
+    }
+  };
+  static const signed char OC_MVMAP2[2][64]={
+    {
+        -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
+    },
+    {
+        -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
+    }
+  };
+  int ystride;
+  int qpx;
+  int qpy;
+  int mx;
+  int my;
+  int mx2;
+  int my2;
+  int offs;
+  ystride=_state->ref_ystride[_pli];
+  qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
+  my=OC_MVMAP[qpy][_dy+31];
+  my2=OC_MVMAP2[qpy][_dy+31];
+  qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
+  mx=OC_MVMAP[qpx][_dx+31];
+  mx2=OC_MVMAP2[qpx][_dx+31];
+  offs=my*ystride+mx;
+  if(mx2||my2){
+    _offsets[1]=offs+my2*ystride+mx2;
+    _offsets[0]=offs;
+    return 2;
+  }
+  _offsets[0]=offs;
+  return 1;
+#endif
+}
+
+void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
+   _last_zzi,_dc_quant);
+}
+
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            mb_mode;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_int16_t p;
+    int         ci;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p;
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8(_state,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+     +frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+      oc_frag_recon_inter2(_state,
+       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
+    }
+    else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs);
+  }
+}
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
+  _dst_frame: The reference frame to copy to.
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy_list(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
+   _src_frame,_pli);
+}
+
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_ystride[_pli];
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+    oc_frag_copy(_state,dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
+  }
+}
+
+static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
+  int y;
+  _pix-=2;
+  for(y=0;y<8;y++){
+    int f;
+    f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[1]=OC_CLAMP255(_pix[1]+f);
+    _pix[2]=OC_CLAMP255(_pix[2]-f);
+    _pix+=_ystride;
+  }
+}
+
+static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
+  int x;
+  _pix-=_ystride*2;
+  for(x=0;x<8;x++){
+    int f;
+    f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
+    _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
+  }
+}
+
+/*Initialize the bounding values array used by the loop filter.
+  _bv: Storage for the array.
+  Return: 0 on success, or a non-zero value if no filtering need be applied.*/
+int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
+  int flimit;
+  int i;
+  flimit=_state->loop_filter_limits[_state->qis[0]];
+  if(flimit==0)return 1;
+  memset(_bv,0,sizeof(_bv[0])*256);
+  for(i=0;i<flimit;i++){
+    if(127-i-flimit>=0)_bv[127-i-flimit]=i-flimit;
+    _bv[127-i]=-i;
+    _bv[127+i]=i;
+    if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i;
+  }
+  return 0;
+}
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
+   _fragy0,_fragy_end);
+}
+
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
+  _bv+=127;
+  fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=_state->ref_ystride[_pli];
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
+        if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          loop_filter_h(ref+8,ystride,_bv);
+        }
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          loop_filter_v(ref+(ystride<<3),ystride,_bv);
+        }
+      }
+      fragi++;
+    }
+    fragi0+=nhfrags;
+  }
+}
+
+#if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf){
+  /*Dump a PNG of the reconstructed image.*/
+  png_structp    png;
+  png_infop      info;
+  png_bytep     *image;
+  FILE          *fp;
+  char           fname[16];
+  unsigned char *y_row;
+  unsigned char *u_row;
+  unsigned char *v_row;
+  unsigned char *y;
+  unsigned char *u;
+  unsigned char *v;
+  ogg_int64_t    iframe;
+  ogg_int64_t    pframe;
+  int            y_stride;
+  int            u_stride;
+  int            v_stride;
+  int            framei;
+  int            width;
+  int            height;
+  int            imgi;
+  int            imgj;
+  width=_state->info.frame_width;
+  height=_state->info.frame_height;
+  iframe=_state->granpos>>_state->info.keyframe_granule_shift;
+  pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
+  sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
+  fp=fopen(fname,"wb");
+  if(fp==NULL)return TH_EFAULT;
+  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
+  if(image==NULL){
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
+  if(png==NULL){
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  info=png_create_info_struct(png);
+  if(info==NULL){
+    png_destroy_write_struct(&png,NULL);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  if(setjmp(png_jmpbuf(png))){
+    png_destroy_write_struct(&png,&info);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  framei=_state->ref_frame_idx[_frame];
+  y_row=_state->ref_frame_bufs[framei][0].data;
+  u_row=_state->ref_frame_bufs[framei][1].data;
+  v_row=_state->ref_frame_bufs[framei][2].data;
+  y_stride=_state->ref_frame_bufs[framei][0].stride;
+  u_stride=_state->ref_frame_bufs[framei][1].stride;
+  v_stride=_state->ref_frame_bufs[framei][2].stride;
+  /*Chroma up-sampling is just done with a box filter.
+    This is very likely what will actually be used in practice on a real
+     display, and also removes one more layer to search in for the source of
+     artifacts.
+    As an added bonus, it's dead simple.*/
+  for(imgi=height;imgi-->0;){
+    int dc;
+    y=y_row;
+    u=u_row;
+    v=v_row;
+    for(imgj=0;imgj<6*width;){
+      float    yval;
+      float    uval;
+      float    vval;
+      unsigned rval;
+      unsigned gval;
+      unsigned bval;
+      /*This is intentionally slow and very accurate.*/
+      yval=(*y-16)*(1.0F/219);
+      uval=(*u-128)*(2*(1-0.114F)/224);
+      vval=(*v-128)*(2*(1-0.299F)/224);
+      rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
+      gval=OC_CLAMPI(0,(int)(65535*(
+       yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
+      bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
+      image[imgi][imgj++]=(unsigned char)(rval>>8);
+      image[imgi][imgj++]=(unsigned char)(rval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(gval>>8);
+      image[imgi][imgj++]=(unsigned char)(gval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(bval>>8);
+      image[imgi][imgj++]=(unsigned char)(bval&0xFF);
+      dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
+      y++;
+      u+=dc;
+      v+=dc;
+    }
+    dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
+    y_row+=y_stride;
+    u_row+=dc&u_stride;
+    v_row+=dc&v_stride;
+  }
+  png_init_io(png,fp);
+  png_set_compression_level(png,Z_BEST_COMPRESSION);
+  png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
+   PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
+  switch(_state->info.colorspace){
+    case TH_CS_ITU_REC_470M:{
+      png_set_gAMA(png,info,2.2);
+      png_set_cHRM_fixed(png,info,31006,31616,
+       67000,32000,21000,71000,14000,8000);
+    }break;
+    case TH_CS_ITU_REC_470BG:{
+      png_set_gAMA(png,info,2.67);
+      png_set_cHRM_fixed(png,info,31271,32902,
+       64000,33000,29000,60000,15000,6000);
+    }break;
+    default:break;
+  }
+  png_set_pHYs(png,info,_state->info.aspect_numerator,
+   _state->info.aspect_denominator,0);
+  png_set_rows(png,info,image);
+  png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
+  png_write_end(png,info);
+  png_destroy_write_struct(&png,&info);
+  oc_free_2d(image);
+  fclose(fp);
+  return 0;
+}
+#endif
+
+
+
+ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    ogg_int64_t iframe;
+    ogg_int64_t pframe;
+    iframe=_granpos>>state->info.keyframe_granule_shift;
+    pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
+    /*3.2.0 streams store the frame index in the granule position.
+      3.2.1 and later store the frame count.
+      We return the index, so adjust the value if we have a 3.2.1 or later
+       stream.*/
+    return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
+  }
+  return -1;
+}
+
+double th_granule_time(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    return (th_granule_frame(_encdec, _granpos)+1)*(
+     (double)state->info.fps_denominator/state->info.fps_numerator);
+  }
+  return -1;
+}

Copied: trunk/theora/lib/tokenize.c (from rev 16442, trunk/theora/lib/enc/tokenize.c)
===================================================================
--- trunk/theora/lib/tokenize.c	                        (rev 0)
+++ trunk/theora/lib/tokenize.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,1072 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+static int oc_make_eob_token(int _run_count){
+  if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1;
+  else{
+    int cat;
+    cat=OC_ILOGNZ_32(_run_count)-3;
+    cat=OC_MINI(cat,3);
+    return OC_DCT_REPEAT_RUN0_TOKEN+cat;
+  }
+}
+
+static int oc_make_eob_token_full(int _run_count,int *_eb){
+  if(_run_count<4){
+    *_eb=0;
+    return OC_DCT_EOB1_TOKEN+_run_count-1;
+  }
+  else{
+    int cat;
+    cat=OC_ILOGNZ_32(_run_count)-3;
+    cat=OC_MINI(cat,3);
+    *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat);
+    return OC_DCT_REPEAT_RUN0_TOKEN+cat;
+  }
+}
+
+/*Returns the number of blocks ended by an EOB token.*/
+static int oc_decode_eob_token(int _token,int _eb){
+  return (0x20820C41U>>_token*5&0x1F)+_eb;
+}
+
+/*TODO: This is now only used during DCT tokenization, and never for runs; it
+   should be simplified.*/
+static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){
+  int neg;
+  int zero_run;
+  int token;
+  int eb;
+  neg=_val<0;
+  _val=abs(_val);
+  zero_run=_zzj-_zzi;
+  if(zero_run>0){
+    int adj;
+    /*Implement a minor restriction on stack 1 so that we know during DC fixups
+       that extending a dctrun token from stack 1 will never overflow.*/
+    adj=_zzi!=1;
+    if(_val<2&&zero_run<17+adj){
+      if(zero_run<6){
+        token=OC_DCT_RUN_CAT1A+zero_run-1;
+        eb=neg;
+      }
+      else if(zero_run<10){
+        token=OC_DCT_RUN_CAT1B;
+        eb=zero_run-6+(neg<<2);
+      }
+      else{
+        token=OC_DCT_RUN_CAT1C;
+        eb=zero_run-10+(neg<<3);
+      }
+    }
+    else if(_val<4&&zero_run<3+adj){
+      if(zero_run<2){
+        token=OC_DCT_RUN_CAT2A;
+        eb=_val-2+(neg<<1);
+      }
+      else{
+        token=OC_DCT_RUN_CAT2B;
+        eb=zero_run-2+(_val-2<<1)+(neg<<2);
+      }
+    }
+    else{
+      if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN;
+      else token=OC_DCT_ZRL_TOKEN;
+      eb=zero_run-1;
+    }
+  }
+  else if(_val<3){
+    token=OC_ONE_TOKEN+(_val-1<<1)+neg;
+    eb=0;
+  }
+  else if(_val<7){
+    token=OC_DCT_VAL_CAT2+_val-3;
+    eb=neg;
+  }
+  else if(_val<9){
+    token=OC_DCT_VAL_CAT3;
+    eb=_val-7+(neg<<1);
+  }
+  else if(_val<13){
+    token=OC_DCT_VAL_CAT4;
+    eb=_val-9+(neg<<2);
+  }
+  else if(_val<21){
+    token=OC_DCT_VAL_CAT5;
+    eb=_val-13+(neg<<3);
+  }
+  else if(_val<37){
+    token=OC_DCT_VAL_CAT6;
+    eb=_val-21+(neg<<4);
+  }
+  else if(_val<69){
+    token=OC_DCT_VAL_CAT7;
+    eb=_val-37+(neg<<5);
+  }
+  else{
+    token=OC_DCT_VAL_CAT8;
+    eb=_val-69+(neg<<9);
+  }
+  *_eb=eb;
+  return token;
+}
+
+/*Token logging to allow a few fragments of efficient rollback.
+  Late SKIP analysis is tied up in the tokenization process, so we need to be
+   able to undo a fragment's tokens on a whim.*/
+
+static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+   0,16,16,16,16,16,32,32,
+  32,32,32,32,32,32,32,48,
+  48,48,48,48,48,48,48,48,
+  48,48,48,48,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){
+  return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits
+   +OC_DCT_TOKEN_EXTRA_BITS[_token];
+}
+
+static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc,
+ oc_token_checkpoint *_cp,int _pli,int _zzi){
+  _cp->pli=_pli;
+  _cp->zzi=_zzi;
+  _cp->eob_run=_enc->eob_run[_pli][_zzi];
+  _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi];
+}
+
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n){
+  int i;
+  for(i=_n;i-->0;){
+    int pli;
+    int zzi;
+    pli=_stack[i].pli;
+    zzi=_stack[i].zzi;
+    _enc->eob_run[pli][zzi]=_stack[i].eob_run;
+    _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens;
+  }
+}
+
+static void oc_enc_token_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _token,int _eb){
+  ptrdiff_t ti;
+  ti=_enc->ndct_tokens[_pli][_zzi]++;
+  _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token;
+  _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb;
+}
+
+static void oc_enc_eob_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _run_count){
+  int token;
+  int eb;
+  token=oc_make_eob_token_full(_run_count,&eb);
+  oc_enc_token_log(_enc,_pli,_zzi,token,eb);
+}
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc){
+  memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens));
+  memset(_enc->eob_run,0,sizeof(_enc->eob_run));
+  memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs));
+  memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last));
+}
+
+typedef struct oc_quant_token oc_quant_token;
+
+/*A single node in the Viterbi trellis.
+  We maintain up to 2 of these per coefficient:
+    - A token to code if the value is zero (EOB, zero run, or combo token).
+    - A token to code if the value is not zero (DCT value token).*/
+struct oc_quant_token{
+  unsigned char next;
+  signed char   token;
+  ogg_int16_t   eb;
+  ogg_uint32_t  cost;
+  int           bits;
+  int           qc;
+};
+
+/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then
+   dequantizes and de-zig-zags the result.
+  The DC coefficient is not preserved; it should be restored by the caller.*/
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _acmin){
+  oc_token_checkpoint *stack;
+  ogg_int64_t          zflags;
+  ogg_int64_t          nzflags;
+  ogg_int64_t          best_flags;
+  ogg_uint32_t         d2_accum[64];
+  oc_quant_token       tokens[64][2];
+  ogg_uint16_t        *eob_run;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint32_t         cost;
+  int                  bits;
+  int                  eob;
+  int                  token;
+  int                  eb;
+  int                  next;
+  int                  huffi;
+  int                  zzi;
+  int                  ti;
+  int                  zzj;
+  int                  qc;
+  huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1];
+  eob_run=_enc->eob_run[_pli];
+  memset(tokens[0],0,sizeof(tokens[0]));
+  best_flags=nzflags=0;
+  zflags=1;
+  d2_accum[0]=0;
+  zzj=64;
+  for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){
+    ogg_int32_t  lambda;
+    ogg_uint32_t best_cost;
+    int          best_bits=best_bits;
+    int          best_next=best_next;
+    int          best_token=best_token;
+    int          best_eb=best_eb;
+    int          best_qc=best_qc;
+    int          flush_bits;
+    ogg_uint32_t d2;
+    int          dq;
+    int          e;
+    int          c;
+    int          s;
+    int          tj;
+    lambda=_enc->lambda;
+    qc=_qdct[zzi];
+    s=-(qc<0);
+    qc=qc+s^s;
+    c=_dct[OC_FZIG_ZAG[zzi]];
+    if(qc<=1){
+      ogg_uint32_t sum_d2;
+      int          nzeros;
+      int          dc_reserve;
+      /*The hard case: try a zero run.*/
+      if(!qc){
+        /*Skip runs that are already quantized to zeros.
+          If we considered each zero coefficient in turn, we might
+           theoretically find a better way to partition long zero runs (e.g.,
+           a run of > 17 zeros followed by a 1 might be better coded as a short
+           zero run followed by a combo token, rather than the longer zero
+           token followed by a 1 value token), but zeros are so common that
+           this becomes very computationally expensive (quadratic instead of
+           linear in the number of coefficients), for a marginal gain.*/
+        while(zzi>1&&!_qdct[zzi-1])zzi--;
+        /*The distortion of coefficients originally quantized to zero is
+           treated as zero (since we'll never quantize them to anything else).*/
+        d2=0;
+      }
+      else{
+        c=c+s^s;
+        d2=c*(ogg_int32_t)c;
+      }
+      eob=eob_run[zzi];
+      nzeros=zzj-zzi;
+      zzj&=63;
+      sum_d2=d2+d2_accum[zzj];
+      d2_accum[zzi]=sum_d2;
+      flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0;
+      /*We reserve 1 spot for combo run tokens that start in the 1st AC stack
+         to ensure they can be extended to include the DC coefficient if
+         necessary; this greatly simplifies stack-rewriting later on.*/
+      dc_reserve=zzi+62>>6;
+      best_cost=0xFFFFFFFF;
+      for(;;){
+        if(nzflags>>zzj&1){
+          int cat;
+          int val;
+          int val_s;
+          int zzk;
+          int tk;
+          next=tokens[zzj][1].next;
+          tk=next&1;
+          zzk=next>>1;
+          /*Try a pure zero run to this point.*/
+          cat=nzeros+55>>6;
+          token=OC_DCT_SHORT_ZRL_TOKEN+cat;
+          bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+          d2=sum_d2-d2_accum[zzj];
+          cost=d2+lambda*bits+tokens[zzj][1].cost;
+          if(cost<=best_cost){
+            best_next=(zzj<<1)+1;
+            best_token=token;
+            best_eb=nzeros-1;
+            best_cost=cost;
+            best_bits=bits+tokens[zzj][1].bits;
+            best_qc=0;
+          }
+          if(nzeros<16+dc_reserve){
+            val=_qdct[zzj];
+            val_s=-(val<0);
+            val=val+val_s^val_s;
+            if(val<=2){
+              /*Try a +/- 1 combo token.*/
+              if(nzeros<6){
+                token=OC_DCT_RUN_CAT1A+nzeros-1;
+                eb=-val_s;
+              }
+              else{
+                cat=nzeros+54>>6;
+                token=OC_DCT_RUN_CAT1B+cat;
+                eb=(-val_s<<cat+2)+nzeros-6-(cat<<2);
+              }
+              e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj];
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+              cost=d2+lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_next=next;
+                best_token=token;
+                best_eb=eb;
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_qc=1+val_s^val_s;
+              }
+            }
+            if(nzeros<2+dc_reserve&&2<=val&&val<=4){
+              /*Try a +/- 2/3 combo token.*/
+              cat=nzeros>>1;
+              token=OC_DCT_RUN_CAT2A+cat;
+              bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+              val=2+((val+val_s^val_s)>2);
+              e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val;
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              cost=d2+lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_next=next;
+                best_token=token;
+                best_eb=(-val_s<<1+cat)+(val-2<<cat)+(nzeros-1>>1);
+                best_qc=val+val_s^val_s;
+              }
+            }
+          }
+          /*zzj can't be coded as a zero, so stop trying to extend the run.*/
+          if(!(zflags>>zzj&1))break;
+        }
+        /*We could try to consider _all_ potentially non-zero coefficients, but
+           if we already found a bunch of them not worth coding, it's fairly
+           unlikely they would now be worth coding from this position; skipping
+           them saves a lot of work.*/
+        zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63;
+        if(zzj==0){
+          /*We made it all the way to the end of the block; try an EOB token.*/
+          if(eob<4095){
+            bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1))
+             -flush_bits;
+          }
+          else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN);
+          cost=sum_d2+bits*lambda;
+          /*If the best route so far is still a pure zero run to the end of the
+             block, force coding it as an EOB.
+            Even if it's not optimal for this block, it has a good chance of
+             getting combined with an EOB token from subsequent blocks, saving
+             bits overall.*/
+          if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){
+            best_next=0;
+            /*This token is just a marker; in reality we may not emit any
+               tokens, but update eob_run[] instead.*/
+            best_token=OC_DCT_EOB1_TOKEN;
+            best_eb=0;
+            best_cost=cost;
+            best_bits=bits;
+            best_qc=0;
+          }
+          break;
+        }
+        nzeros=zzj-zzi;
+      }
+      tokens[zzi][0].next=(unsigned char)best_next;
+      tokens[zzi][0].token=(signed char)best_token;
+      tokens[zzi][0].eb=(ogg_int16_t)best_eb;
+      tokens[zzi][0].cost=best_cost;
+      tokens[zzi][0].bits=best_bits;
+      tokens[zzi][0].qc=best_qc;
+      zflags|=(ogg_int64_t)1<<zzi;
+      if(qc){
+        dq=_dequant[zzi];
+        if(zzi<_acmin)lambda=0;
+        e=dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=OC_ONE_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        zzj=zzi+1&63;
+        tj=best_flags>>zzj&1;
+        next=(zzj<<1)+tj;
+        tokens[zzi][1].next=(unsigned char)next;
+        tokens[zzi][1].token=(signed char)token;
+        tokens[zzi][1].eb=0;
+        tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost;
+        tokens[zzi][1].bits=bits+tokens[zzj][tj].bits;
+        tokens[zzi][1].qc=1+s^s;
+        nzflags|=(ogg_int64_t)1<<zzi;
+        best_flags|=
+         (ogg_int64_t)(tokens[zzi][1].cost<tokens[zzi][0].cost)<<zzi;
+      }
+    }
+    else{
+      eob=eob_run[zzi];
+      if(zzi<_acmin)lambda=0;
+      c=c+s^s;
+      dq=_dequant[zzi];
+      /*No zero run can extend past this point.*/
+      d2_accum[zzi]=0;
+      flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0;
+      if(qc<=2){
+        e=2*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_TWO_TOKEN-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=OC_ONE_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+        best_eb=0;
+      }
+      else if(qc<=3){
+        e=3*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT2;
+        best_eb=-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=OC_TWO_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=0;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+      }
+      else if(qc<=6){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT2+qc-3;
+        best_eb=-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+      }
+      else if(qc<=8){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT3;
+        best_eb=(-s<<1)+qc-7;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=6*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=OC_DCT_VAL_CAT2+3;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=-s;
+          best_bits=bits;
+          best_cost=cost;
+          qc=6;
+        }
+      }
+      else if(qc<=12){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT4;
+        best_eb=(-s<<2)+qc-9;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=8*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<1)+1;
+          best_bits=bits;
+          best_cost=cost;
+          qc=8;
+        }
+      }
+      else if(qc<=20){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT5;
+        best_eb=(-s<<3)+qc-13;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=12*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<2)+3;
+          best_bits=bits;
+          best_cost=cost;
+          qc=12;
+        }
+      }
+      else if(qc<=36){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT6;
+        best_eb=(-s<<4)+qc-21;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=20*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<3)+7;
+          best_bits=bits;
+          best_cost=cost;
+          qc=20;
+        }
+      }
+      else if(qc<=68){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT7;
+        best_eb=(-s<<5)+qc-37;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=36*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<best_cost){
+          best_token=token;
+          best_eb=(-s<<4)+15;
+          best_bits=bits;
+          best_cost=cost;
+          qc=36;
+        }
+      }
+      else{
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT8;
+        best_eb=(-s<<9)+qc-69;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=68*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<best_cost){
+          best_token=token;
+          best_eb=(-s<<5)+31;
+          best_bits=bits;
+          best_cost=cost;
+          qc=68;
+        }
+      }
+      zzj=zzi+1&63;
+      tj=best_flags>>zzj&1;
+      next=(zzj<<1)+tj;
+      tokens[zzi][1].next=(unsigned char)next;
+      tokens[zzi][1].token=(signed char)best_token;
+      tokens[zzi][1].eb=best_eb;
+      tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost;
+      tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits;
+      tokens[zzi][1].qc=qc+s^s;
+      nzflags|=(ogg_int64_t)1<<zzi;
+      best_flags|=(ogg_int64_t)1<<zzi;
+    }
+    zzj=zzi;
+  }
+  /*Emit the tokens from the best path through the trellis.*/
+  stack=*_stack;
+  /*We blow away the first entry here so that things vectorize better.
+    The DC coefficient is not actually stored in the array yet.*/
+  for(zzi=0;zzi<64;zzi++)_qdct[zzi]=0;
+  dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag;
+  zzi=1;
+  ti=best_flags>>1&1;
+  bits=tokens[zzi][ti].bits;
+  do{
+    oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+    eob=eob_run[zzi];
+    if(tokens[zzi][ti].token<OC_NDCT_EOB_TOKEN_MAX){
+      if(++eob>=4095){
+        oc_enc_eob_log(_enc,_pli,zzi,eob);
+        eob=0;
+      }
+      eob_run[zzi]=eob;
+      /*We don't include the actual EOB cost for this block in the return value.
+        It will be paid for by the fragment that terminates the EOB run.*/
+      bits-=tokens[zzi][ti].bits;
+      zzi=_zzi;
+      break;
+    }
+    /*Emit pending EOB run if any.*/
+    if(eob>0){
+      oc_enc_eob_log(_enc,_pli,zzi,eob);
+      eob_run[zzi]=0;
+    }
+    oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb);
+    next=tokens[zzi][ti].next;
+    qc=tokens[zzi][ti].qc;
+    zzj=(next>>1)-1&63;
+    /*TODO: It may be worth saving the dequantized coefficient in the trellis
+       above; we had to compute it to measure the error anyway.*/
+    _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]);
+    zzi=next>>1;
+    ti=next&1;
+  }
+  while(zzi);
+  *_stack=stack;
+  return bits;
+}
+
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  ogg_int16_t             *frag_dc;
+  ptrdiff_t                fragi;
+  int                     *pred_last;
+  int                      nhfrags;
+  int                      fragx;
+  int                      fragy;
+  fplane=_enc->state.fplanes+_pli;
+  frags=_enc->state.frags;
+  frag_dc=_enc->frag_dc;
+  pred_last=_enc->dc_pred_last[_pli];
+  nhfrags=fplane->nhfrags;
+  fragi=fplane->froffset+_fragy0*nhfrags;
+  for(fragy=_fragy0;fragy<_frag_yend;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[ref]);
+          pred_last[ref]=frags[fragi].dc;
+        }
+      }
+    }
+    else{
+      const oc_fragment *u_frags;
+      int                l_ref;
+      int                ul_ref;
+      int                u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else{
+          ur_ref=u_frags[fragi+1].coded?
+           OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+        }
+        if(frags[fragi].coded){
+          int pred;
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==ref)|(ul_ref==ref)<<1|
+           (u_ref==ref)<<2|(ur_ref==ref)<<3){
+            default:pred=pred_last[ref];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred);
+          pred_last[ref]=frags[fragi].dc;
+          l_ref=ref;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+}
+
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1){
+  const ogg_int16_t *frag_dc;
+  ptrdiff_t          fragii;
+  unsigned char     *dct_tokens0;
+  unsigned char     *dct_tokens1;
+  ogg_uint16_t      *extra_bits0;
+  ogg_uint16_t      *extra_bits1;
+  ptrdiff_t          ti0;
+  ptrdiff_t          ti1r;
+  ptrdiff_t          ti1w;
+  int                eob_run0;
+  int                eob_run1;
+  int                neobs1;
+  int                token;
+  int                eb;
+  int                token1=token1;
+  int                eb1=eb1;
+  /*Return immediately if there are no coded fragments; otherwise we'd flush
+     any trailing EOB run into the AC 1 list and never read it back out.*/
+  if(_ncoded_fragis<=0)return;
+  frag_dc=_enc->frag_dc;
+  dct_tokens0=_enc->dct_tokens[_pli][0];
+  dct_tokens1=_enc->dct_tokens[_pli][1];
+  extra_bits0=_enc->extra_bits[_pli][0];
+  extra_bits1=_enc->extra_bits[_pli][1];
+  ti0=_enc->ndct_tokens[_pli][0];
+  ti1w=ti1r=_prev_ndct_tokens1;
+  eob_run0=_enc->eob_run[_pli][0];
+  /*Flush any trailing EOB run for the 1st AC coefficient.
+    This is needed to allow us to track tokens to the end of the list.*/
+  eob_run1=_enc->eob_run[_pli][1];
+  if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1);
+  /*If there was an active EOB run at the start of the 1st AC stack, read it
+     in and decode it.*/
+  if(_prev_eob_run1>0){
+    token1=dct_tokens1[ti1r];
+    eb1=extra_bits1[ti1r];
+    ti1r++;
+    eob_run1=oc_decode_eob_token(token1,eb1);
+    /*Consume the portion of the run that came before these fragments.*/
+    neobs1=eob_run1-_prev_eob_run1;
+  }
+  else eob_run1=neobs1=0;
+  for(fragii=0;fragii<_ncoded_fragis;fragii++){
+    int val;
+    /*All tokens in the 1st AC coefficient stack are regenerated as the DC
+       coefficients are produced.
+      This can be done in-place; stack 1 cannot get larger.*/
+    if(!neobs1){
+      /*There's no active EOB run in stack 1; read the next token.*/
+      token1=dct_tokens1[ti1r];
+      eb1=extra_bits1[ti1r];
+      ti1r++;
+      if(token1<OC_NDCT_EOB_TOKEN_MAX){
+        neobs1=oc_decode_eob_token(token1,eb1);
+        /*It's an EOB run; add it to the current (inactive) one.
+          Because we may have moved entries to stack 0, we may have an
+           opportunity to merge two EOB runs in stack 1.*/
+        eob_run1+=neobs1;
+      }
+    }
+    val=frag_dc[_coded_fragis[fragii]];
+    if(val){
+      /*There was a non-zero DC value, so there's no alteration to stack 1
+         for this fragment; just code the stack 0 token.*/
+      /*Flush any pending EOB run.*/
+      if(eob_run0>0){
+        token=oc_make_eob_token_full(eob_run0,&eb);
+        dct_tokens0[ti0]=(unsigned char)token;
+        extra_bits0[ti0]=(ogg_uint16_t)eb;
+        ti0++;
+        eob_run0=0;
+      }
+      token=oc_make_dct_token_full(0,0,val,&eb);
+      dct_tokens0[ti0]=(unsigned char)token;
+      extra_bits0[ti0]=(ogg_uint16_t)eb;
+      ti0++;
+    }
+    else{
+      /*Zero DC value; that means the entry in stack 1 might need to be coded
+         from stack 0.
+        This requires a stack 1 fixup.*/
+      if(neobs1>0){
+        /*We're in the middle of an active EOB run in stack 1.
+          Move it to stack 0.*/
+        if(++eob_run0>=4095){
+          token=oc_make_eob_token_full(eob_run0,&eb);
+          dct_tokens0[ti0]=(unsigned char)token;
+          extra_bits0[ti0]=(ogg_uint16_t)eb;
+          ti0++;
+          eob_run0=0;
+        }
+        eob_run1--;
+      }
+      else{
+        /*No active EOB run in stack 1, so we can't extend one in stack 0.
+          Flush it if we've got it.*/
+        if(eob_run0>0){
+          token=oc_make_eob_token_full(eob_run0,&eb);
+          dct_tokens0[ti0]=(unsigned char)token;
+          extra_bits0[ti0]=(ogg_uint16_t)eb;
+          ti0++;
+          eob_run0=0;
+        }
+        /*Stack 1 token is one of: a pure zero run token, a single
+           coefficient token, or a zero run/coefficient combo token.
+          A zero run token is expanded and moved to token stack 0, and the
+           stack 1 entry dropped.
+          A single coefficient value may be transformed into combo token that
+           is moved to stack 0, or if it cannot be combined, it is left alone
+           and a single length-1 zero run is emitted in stack 0.
+          A combo token is extended and moved to stack 0.
+          During AC coding, we restrict the run lengths on combo tokens for
+           stack 1 to guarantee we can extend them.*/
+        switch(token1){
+          case OC_DCT_SHORT_ZRL_TOKEN:{
+            if(eb1<7){
+              dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            /*Fall through.*/
+          }
+          case OC_DCT_ZRL_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_ONE_TOKEN:
+          case OC_MINUS_ONE_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_TWO_TOKEN:
+          case OC_MINUS_TWO_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_VAL_CAT2:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A:
+          case OC_DCT_RUN_CAT1A+1:
+          case OC_DCT_RUN_CAT1A+2:
+          case OC_DCT_RUN_CAT1A+3:{
+            dct_tokens0[ti0]=(unsigned char)(token1+1);
+            extra_bits0[ti0]=(ogg_uint16_t)eb1;
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A+4:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1B:{
+            if((eb1&3)<3){
+              dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            eb1=((eb1&4)<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT1C:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1C;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT2A:{
+            eb1=(eb1<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT2B:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+        }
+        /*We can't merge tokens, write a short zero run and keep going.*/
+        dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+        extra_bits0[ti0]=0;
+        ti0++;
+      }
+    }
+    if(!neobs1){
+      /*Flush any (inactive) EOB run.*/
+      if(eob_run1>0){
+        token=oc_make_eob_token_full(eob_run1,&eb);
+        dct_tokens1[ti1w]=(unsigned char)token;
+        extra_bits1[ti1w]=(ogg_uint16_t)eb;
+        ti1w++;
+        eob_run1=0;
+      }
+      /*There's no active EOB run, so log the current token.*/
+      dct_tokens1[ti1w]=(unsigned char)token1;
+      extra_bits1[ti1w]=(ogg_uint16_t)eb1;
+      ti1w++;
+    }
+    else{
+      /*Otherwise consume one EOB from the current run.*/
+      neobs1--;
+      /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/
+      if(eob_run1-neobs1>=4095){
+        token=oc_make_eob_token_full(4095,&eb);
+        dct_tokens1[ti1w]=(unsigned char)token;
+        extra_bits1[ti1w]=(ogg_uint16_t)eb;
+        ti1w++;
+        eob_run1-=4095;
+      }
+    }
+  }
+  /*Save the current state.*/
+  _enc->ndct_tokens[_pli][0]=ti0;
+  _enc->ndct_tokens[_pli][1]=ti1w;
+  _enc->eob_run[_pli][0]=eob_run0;
+  _enc->eob_run[_pli][1]=eob_run1;
+}
+
+/*Final EOB run welding.*/
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc){
+  int pli;
+  int zzi;
+  /*Emit final EOB runs.*/
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    int eob_run;
+    eob_run=_enc->eob_run[pli][zzi];
+    if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run);
+  }
+  /*Merge the final EOB run of one token list with the start of the next, if
+     possible.*/
+  for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){
+    int       old_tok1;
+    int       old_tok2;
+    int       old_eb1;
+    int       old_eb2;
+    int       new_tok;
+    int       new_eb;
+    int       zzj;
+    int       plj;
+    ptrdiff_t ti=ti;
+    int       run_count;
+    /*Make sure this coefficient has tokens at all.*/
+    if(_enc->ndct_tokens[pli][zzi]<=0)continue;
+    /*Ensure the first token is an EOB run.*/
+    old_tok2=_enc->dct_tokens[pli][zzi][0];
+    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Search for a previous coefficient that has any tokens at all.*/
+    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+    for(zzj=zzi,plj=pli;zzj>=0;zzj--){
+      while(plj-->0){
+        ti=_enc->ndct_tokens[plj][zzj]-1;
+        if(ti>=_enc->dct_token_offs[plj][zzj]){
+          old_tok1=_enc->dct_tokens[plj][zzj][ti];
+          break;
+        }
+      }
+      if(plj>=0)break;
+      plj=3;
+    }
+    /*Ensure its last token was an EOB run.*/
+    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Pull off the associated extra bits, if any, and decode the runs.*/
+    old_eb1=_enc->extra_bits[plj][zzj][ti];
+    old_eb2=_enc->extra_bits[pli][zzi][0];
+    run_count=oc_decode_eob_token(old_tok1,old_eb1)
+     +oc_decode_eob_token(old_tok2,old_eb2);
+    /*We can't possibly combine these into one run.
+      It might be possible to split them more optimally, but we'll just leave
+       them as-is.*/
+    if(run_count>=4096)continue;
+    /*We CAN combine them into one run.*/
+    new_tok=oc_make_eob_token_full(run_count,&new_eb);
+    _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok;
+    _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb;
+    _enc->dct_token_offs[pli][zzi]++;
+  }
+}

Copied: trunk/theora/lib/x86/mmxencfrag.c (from rev 16442, trunk/theora/lib/enc/x86/mmxencfrag.c)
===================================================================
--- trunk/theora/lib/x86/mmxencfrag.c	                        (rev 0)
+++ trunk/theora/lib/x86/mmxencfrag.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,900 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $
+
+ ********************************************************************/
+#include <stddef.h>
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  ptrdiff_t ystride3;
+  ptrdiff_t ret;
+  __asm__ __volatile__(
+    /*Load the first 4 rows of each block.*/
+    "movq (%[src]),%%mm0\n\t"
+    "movq (%[ref]),%%mm1\n\t"
+    "movq (%[src],%[ystride]),%%mm2\n\t"
+    "movq (%[ref],%[ystride]),%%mm3\n\t"
+    "lea (%[ystride],%[ystride],2),%[ystride3]\n\t"
+    "movq (%[src],%[ystride],2),%%mm4\n\t"
+    "movq (%[ref],%[ystride],2),%%mm5\n\t"
+    "movq (%[src],%[ystride3]),%%mm6\n\t"
+    "movq (%[ref],%[ystride3]),%%mm7\n\t"
+    /*Compute their SADs and add them in %%mm0*/
+    "psadbw %%mm1,%%mm0\n\t"
+    "psadbw %%mm3,%%mm2\n\t"
+    "lea (%[src],%[ystride],4),%[src]\n\t"
+    "paddw %%mm2,%%mm0\n\t"
+    "lea (%[ref],%[ystride],4),%[ref]\n\t"
+    /*Load the next 3 rows as registers become available.*/
+    "movq (%[src]),%%mm2\n\t"
+    "movq (%[ref]),%%mm3\n\t"
+    "psadbw %%mm5,%%mm4\n\t"
+    "psadbw %%mm7,%%mm6\n\t"
+    "paddw %%mm4,%%mm0\n\t"
+    "movq (%[ref],%[ystride]),%%mm5\n\t"
+    "movq (%[src],%[ystride]),%%mm4\n\t"
+    "paddw %%mm6,%%mm0\n\t"
+    "movq (%[ref],%[ystride],2),%%mm7\n\t"
+    "movq (%[src],%[ystride],2),%%mm6\n\t"
+    /*Start adding their SADs to %%mm0*/
+    "psadbw %%mm3,%%mm2\n\t"
+    "psadbw %%mm5,%%mm4\n\t"
+    "paddw %%mm2,%%mm0\n\t"
+    "psadbw %%mm7,%%mm6\n\t"
+    /*Load last row as registers become available.*/
+    "movq (%[src],%[ystride3]),%%mm2\n\t"
+    "movq (%[ref],%[ystride3]),%%mm3\n\t"
+    /*And finish adding up their SADs.*/
+    "paddw %%mm4,%%mm0\n\t"
+    "psadbw %%mm3,%%mm2\n\t"
+    "paddw %%mm6,%%mm0\n\t"
+    "paddw %%mm2,%%mm0\n\t"
+    "movd %%mm0,%[ret]\n\t"
+    :[ret]"=a"(ret),[src]"+%r"(_src),[ref]"+r"(_ref),[ystride3]"=&r"(ystride3)
+    :[ystride]"r"((ptrdiff_t)_ystride)
+  );
+  return (unsigned)ret;
+}
+
+unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  /*Early termination is for suckers.*/
+  return oc_enc_frag_sad_mmxext(_src,_ref,_ystride);
+}
+
+/*Assumes the first two rows of %[ref1] and %[ref2] are in %%mm0...%%mm3, the
+   first two rows of %[src] are in %%mm4,%%mm5, and {1}x8 is in %%mm7.
+  We pre-load the next two rows of data as registers become available.*/
+#define OC_SAD2_LOOP \
+ "#OC_SAD2_LOOP\n\t" \
+ /*We want to compute (%%mm0+%%mm1>>1) on unsigned bytes without overflow, but \
+    pavgb computes (%%mm0+%%mm1+1>>1). \
+   The latter is exactly 1 too large when the low bit of two corresponding \
+    bytes is only set in one of them. \
+   Therefore we pxor the operands, pand to mask out the low bits, and psubb to \
+    correct the output of pavgb.*/ \
+ "movq %%mm0,%%mm6\n\t" \
+ "lea (%[ref1],%[ystride],2),%[ref1]\n\t" \
+ "pxor %%mm1,%%mm0\n\t" \
+ "pavgb %%mm1,%%mm6\n\t" \
+ "lea (%[ref2],%[ystride],2),%[ref2]\n\t" \
+ "movq %%mm2,%%mm1\n\t" \
+ "pand %%mm7,%%mm0\n\t" \
+ "pavgb %%mm3,%%mm2\n\t" \
+ "pxor %%mm3,%%mm1\n\t" \
+ "movq (%[ref2],%[ystride]),%%mm3\n\t" \
+ "psubb %%mm0,%%mm6\n\t" \
+ "movq (%[ref1]),%%mm0\n\t" \
+ "pand %%mm7,%%mm1\n\t" \
+ "psadbw %%mm6,%%mm4\n\t" \
+ "movd %[ret],%%mm6\n\t" \
+ "psubb %%mm1,%%mm2\n\t" \
+ "movq (%[ref2]),%%mm1\n\t" \
+ "lea (%[src],%[ystride],2),%[src]\n\t" \
+ "psadbw %%mm2,%%mm5\n\t" \
+ "movq (%[ref1],%[ystride]),%%mm2\n\t" \
+ "paddw %%mm4,%%mm5\n\t" \
+ "movq (%[src]),%%mm4\n\t" \
+ "paddw %%mm5,%%mm6\n\t" \
+ "movq (%[src],%[ystride]),%%mm5\n\t" \
+ "movd %%mm6,%[ret]\n\t" \
+
+/*Same as above, but does not pre-load the next two rows.*/
+#define OC_SAD2_TAIL \
+ "#OC_SAD2_TAIL\n\t" \
+ "movq %%mm0,%%mm6\n\t" \
+ "pavgb %%mm1,%%mm0\n\t" \
+ "pxor %%mm1,%%mm6\n\t" \
+ "movq %%mm2,%%mm1\n\t" \
+ "pand %%mm7,%%mm6\n\t" \
+ "pavgb %%mm3,%%mm2\n\t" \
+ "pxor %%mm3,%%mm1\n\t" \
+ "psubb %%mm6,%%mm0\n\t" \
+ "pand %%mm7,%%mm1\n\t" \
+ "psadbw %%mm0,%%mm4\n\t" \
+ "psubb %%mm1,%%mm2\n\t" \
+ "movd %[ret],%%mm6\n\t" \
+ "psadbw %%mm2,%%mm5\n\t" \
+ "paddw %%mm4,%%mm5\n\t" \
+ "paddw %%mm5,%%mm6\n\t" \
+ "movd %%mm6,%[ret]\n\t" \
+
+unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  ptrdiff_t ret;
+  __asm__ __volatile__(
+    "movq (%[ref1]),%%mm0\n\t"
+    "movq (%[ref2]),%%mm1\n\t"
+    "movq (%[ref1],%[ystride]),%%mm2\n\t"
+    "movq (%[ref2],%[ystride]),%%mm3\n\t"
+    "xor %[ret],%[ret]\n\t"
+    "movq (%[src]),%%mm4\n\t"
+    "pxor %%mm7,%%mm7\n\t"
+    "pcmpeqb %%mm6,%%mm6\n\t"
+    "movq (%[src],%[ystride]),%%mm5\n\t"
+    "psubb %%mm6,%%mm7\n\t"
+    OC_SAD2_LOOP
+    OC_SAD2_LOOP
+    OC_SAD2_LOOP
+    OC_SAD2_TAIL
+    :[ret]"=&a"(ret),[src]"+r"(_src),[ref1]"+%r"(_ref1),[ref2]"+r"(_ref2)
+    :[ystride]"r"((ptrdiff_t)_ystride)
+  );
+  return (unsigned)ret;
+}
+
+/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their
+   16-bit difference in %%mm0...%%mm7.*/
+#define OC_LOAD_SUB_8x4(_off) \
+ "#OC_LOAD_SUB_8x4\n\t" \
+ "movd "_off"(%[src]),%%mm0\n\t" \
+ "movd "_off"(%[ref]),%%mm4\n\t" \
+ "movd "_off"(%[src],%[src_ystride]),%%mm1\n\t" \
+ "lea (%[src],%[src_ystride],2),%[src]\n\t" \
+ "movd "_off"(%[ref],%[ref_ystride]),%%mm5\n\t" \
+ "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
+ "movd "_off"(%[src]),%%mm2\n\t" \
+ "movd "_off"(%[ref]),%%mm7\n\t" \
+ "movd "_off"(%[src],%[src_ystride]),%%mm3\n\t" \
+ "movd "_off"(%[ref],%[ref_ystride]),%%mm6\n\t" \
+ "punpcklbw %%mm4,%%mm0\n\t" \
+ "lea (%[src],%[src_ystride],2),%[src]\n\t" \
+ "punpcklbw %%mm4,%%mm4\n\t" \
+ "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
+ "psubw %%mm4,%%mm0\n\t" \
+ "movd "_off"(%[src]),%%mm4\n\t" \
+ "movq %%mm0,"_off"*2(%[buf])\n\t" \
+ "movd "_off"(%[ref]),%%mm0\n\t" \
+ "punpcklbw %%mm5,%%mm1\n\t" \
+ "punpcklbw %%mm5,%%mm5\n\t" \
+ "psubw %%mm5,%%mm1\n\t" \
+ "movd "_off"(%[src],%[src_ystride]),%%mm5\n\t" \
+ "punpcklbw %%mm7,%%mm2\n\t" \
+ "punpcklbw %%mm7,%%mm7\n\t" \
+ "psubw %%mm7,%%mm2\n\t" \
+ "movd "_off"(%[ref],%[ref_ystride]),%%mm7\n\t" \
+ "punpcklbw %%mm6,%%mm3\n\t" \
+ "lea (%[src],%[src_ystride],2),%[src]\n\t" \
+ "punpcklbw %%mm6,%%mm6\n\t" \
+ "psubw %%mm6,%%mm3\n\t" \
+ "movd "_off"(%[src]),%%mm6\n\t" \
+ "punpcklbw %%mm0,%%mm4\n\t" \
+ "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
+ "punpcklbw %%mm0,%%mm0\n\t" \
+ "lea (%[src],%[src_ystride],2),%[src]\n\t" \
+ "psubw %%mm0,%%mm4\n\t" \
+ "movd "_off"(%[ref]),%%mm0\n\t" \
+ "punpcklbw %%mm7,%%mm5\n\t" \
+ "neg %[src_ystride]\n\t" \
+ "punpcklbw %%mm7,%%mm7\n\t" \
+ "psubw %%mm7,%%mm5\n\t" \
+ "movd "_off"(%[src],%[src_ystride]),%%mm7\n\t" \
+ "punpcklbw %%mm0,%%mm6\n\t" \
+ "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
+ "punpcklbw %%mm0,%%mm0\n\t" \
+ "neg %[ref_ystride]\n\t" \
+ "psubw %%mm0,%%mm6\n\t" \
+ "movd "_off"(%[ref],%[ref_ystride]),%%mm0\n\t" \
+ "lea (%[src],%[src_ystride],8),%[src]\n\t" \
+ "punpcklbw %%mm0,%%mm7\n\t" \
+ "neg %[src_ystride]\n\t" \
+ "punpcklbw %%mm0,%%mm0\n\t" \
+ "lea (%[ref],%[ref_ystride],8),%[ref]\n\t" \
+ "psubw %%mm0,%%mm7\n\t" \
+ "neg %[ref_ystride]\n\t" \
+ "movq "_off"*2(%[buf]),%%mm0\n\t" \
+
+/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/
+#define OC_LOAD_8x4(_off) \
+ "#OC_LOAD_8x4\n\t" \
+ "movd "_off"(%[src]),%%mm0\n\t" \
+ "movd "_off"(%[src],%[ystride]),%%mm1\n\t" \
+ "movd "_off"(%[src],%[ystride],2),%%mm2\n\t" \
+ "pxor %%mm7,%%mm7\n\t" \
+ "movd "_off"(%[src],%[ystride3]),%%mm3\n\t" \
+ "punpcklbw %%mm7,%%mm0\n\t" \
+ "movd "_off"(%[src4]),%%mm4\n\t" \
+ "punpcklbw %%mm7,%%mm1\n\t" \
+ "movd "_off"(%[src4],%[ystride]),%%mm5\n\t" \
+ "punpcklbw %%mm7,%%mm2\n\t" \
+ "movd "_off"(%[src4],%[ystride],2),%%mm6\n\t" \
+ "punpcklbw %%mm7,%%mm3\n\t" \
+ "movd "_off"(%[src4],%[ystride3]),%%mm7\n\t" \
+ "punpcklbw %%mm4,%%mm4\n\t" \
+ "punpcklbw %%mm5,%%mm5\n\t" \
+ "psrlw $8,%%mm4\n\t" \
+ "psrlw $8,%%mm5\n\t" \
+ "punpcklbw %%mm6,%%mm6\n\t" \
+ "punpcklbw %%mm7,%%mm7\n\t" \
+ "psrlw $8,%%mm6\n\t" \
+ "psrlw $8,%%mm7\n\t" \
+
+/*Performs the first two stages of an 8-point 1-D Hadamard transform.
+  The transform is performed in place, except that outputs 0-3 are swapped with
+   outputs 4-7.
+  Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to
+   perform this stage in place with no temporary registers).*/
+#define OC_HADAMARD_AB_8x4 \
+ "#OC_HADAMARD_AB_8x4\n\t" \
+ /*Stage A: \
+   Outputs 0-3 are swapped with 4-7 here.*/ \
+ "paddw %%mm1,%%mm5\n\t" \
+ "paddw %%mm2,%%mm6\n\t" \
+ "paddw %%mm1,%%mm1\n\t" \
+ "paddw %%mm2,%%mm2\n\t" \
+ "psubw %%mm5,%%mm1\n\t" \
+ "psubw %%mm6,%%mm2\n\t" \
+ "paddw %%mm3,%%mm7\n\t" \
+ "paddw %%mm0,%%mm4\n\t" \
+ "paddw %%mm3,%%mm3\n\t" \
+ "paddw %%mm0,%%mm0\n\t" \
+ "psubw %%mm7,%%mm3\n\t" \
+ "psubw %%mm4,%%mm0\n\t" \
+ /*Stage B:*/ \
+ "paddw %%mm2,%%mm0\n\t" \
+ "paddw %%mm3,%%mm1\n\t" \
+ "paddw %%mm6,%%mm4\n\t" \
+ "paddw %%mm7,%%mm5\n\t" \
+ "paddw %%mm2,%%mm2\n\t" \
+ "paddw %%mm3,%%mm3\n\t" \
+ "paddw %%mm6,%%mm6\n\t" \
+ "paddw %%mm7,%%mm7\n\t" \
+ "psubw %%mm0,%%mm2\n\t" \
+ "psubw %%mm1,%%mm3\n\t" \
+ "psubw %%mm4,%%mm6\n\t" \
+ "psubw %%mm5,%%mm7\n\t" \
+
+/*Performs the last stage of an 8-point 1-D Hadamard transform in place.
+  Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in
+   place with no temporary registers).*/
+#define OC_HADAMARD_C_8x4 \
+ "#OC_HADAMARD_C_8x4\n\t" \
+ /*Stage C:*/ \
+ "paddw %%mm1,%%mm0\n\t" \
+ "paddw %%mm3,%%mm2\n\t" \
+ "paddw %%mm5,%%mm4\n\t" \
+ "paddw %%mm7,%%mm6\n\t" \
+ "paddw %%mm1,%%mm1\n\t" \
+ "paddw %%mm3,%%mm3\n\t" \
+ "paddw %%mm5,%%mm5\n\t" \
+ "paddw %%mm7,%%mm7\n\t" \
+ "psubw %%mm0,%%mm1\n\t" \
+ "psubw %%mm2,%%mm3\n\t" \
+ "psubw %%mm4,%%mm5\n\t" \
+ "psubw %%mm6,%%mm7\n\t" \
+
+/*Performs an 8-point 1-D Hadamard transform.
+  The transform is performed in place, except that outputs 0-3 are swapped with
+   outputs 4-7.
+  Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform
+   in place with no temporary registers).*/
+#define OC_HADAMARD_8x4 \
+ OC_HADAMARD_AB_8x4 \
+ OC_HADAMARD_C_8x4 \
+
+/*Performs the first part of the final stage of the Hadamard transform and
+   summing of absolute values.
+  At the end of this part, %%mm1 will contain the DC coefficient of the
+   transform.*/
+#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \
+ /*We use the fact that \
+     (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \
+    to merge the final butterfly with the abs and the first stage of \
+    accumulation. \
+   Thus we can avoid using pabsw, which is not available until SSSE3. \
+   Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \
+    implementation would be (3+3)*8+7=55 instructions (+4 for spilling \
+    registers). \
+   Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \
+   This implementation is only 26 (+4 for spilling registers).*/ \
+ "#OC_HADAMARD_C_ABS_ACCUM_A_8x4\n\t" \
+ "movq %%mm7,"_r7"(%[buf])\n\t" \
+ "movq %%mm6,"_r6"(%[buf])\n\t" \
+ /*mm7={0x7FFF}x4 \
+   mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \
+ "pcmpeqb %%mm7,%%mm7\n\t" \
+ "movq %%mm0,%%mm6\n\t" \
+ "psrlw $1,%%mm7\n\t" \
+ "paddw %%mm1,%%mm6\n\t" \
+ "pmaxsw %%mm1,%%mm0\n\t" \
+ "paddsw %%mm7,%%mm6\n\t" \
+ "psubw %%mm6,%%mm0\n\t" \
+ /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \
+   mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \
+ "movq %%mm2,%%mm6\n\t" \
+ "movq %%mm4,%%mm1\n\t" \
+ "pmaxsw %%mm3,%%mm2\n\t" \
+ "pmaxsw %%mm5,%%mm4\n\t" \
+ "paddw %%mm3,%%mm6\n\t" \
+ "paddw %%mm5,%%mm1\n\t" \
+ "movq "_r7"(%[buf]),%%mm3\n\t" \
+
+/*Performs the second part of the final stage of the Hadamard transform and
+   summing of absolute values.*/
+#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \
+ "#OC_HADAMARD_C_ABS_ACCUM_B_8x4\n\t" \
+ "paddsw %%mm7,%%mm6\n\t" \
+ "movq "_r6"(%[buf]),%%mm5\n\t" \
+ "paddsw %%mm7,%%mm1\n\t" \
+ "psubw %%mm6,%%mm2\n\t" \
+ "psubw %%mm1,%%mm4\n\t" \
+ /*mm7={1}x4 (needed for the horizontal add that follows) \
+   mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \
+ "movq %%mm3,%%mm6\n\t" \
+ "pmaxsw %%mm5,%%mm3\n\t" \
+ "paddw %%mm2,%%mm0\n\t" \
+ "paddw %%mm5,%%mm6\n\t" \
+ "paddw %%mm4,%%mm0\n\t" \
+ "paddsw %%mm7,%%mm6\n\t" \
+ "paddw %%mm3,%%mm0\n\t" \
+ "psrlw $14,%%mm7\n\t" \
+ "psubw %%mm6,%%mm0\n\t" \
+
+/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the
+   absolute value of each component, and accumulates everything into mm0.
+  This is the only portion of SATD which requires MMXEXT (we could use plain
+   MMX, but it takes 4 instructions and an extra register to work around the
+   lack of a pmaxsw, which is a pretty serious penalty).*/
+#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \
+ OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \
+ OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \
+
+/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each
+   component, and accumulates everything into mm0.
+  Note that mm0 will have an extra 4 added to each column, and that after
+   removing this value, the remainder will be half the conventional value.*/
+#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) \
+ OC_HADAMARD_AB_8x4 \
+ OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7)
+
+/*Performs two 4x4 transposes (mostly) in place.
+  On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7}
+   contains rows {a,b,c,d}.
+  On output, {0x40,0x50,0x60,0x70}+_off(%[buf]) contains {e,f,g,h}^T, and
+   {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/
+#define OC_TRANSPOSE_4x4x2(_off) \
+ "#OC_TRANSPOSE_4x4x2\n\t" \
+ /*First 4x4 transpose:*/ \
+ "movq %%mm5,0x10+"_off"(%[buf])\n\t" \
+ /*mm0 = e3 e2 e1 e0 \
+   mm1 = f3 f2 f1 f0 \
+   mm2 = g3 g2 g1 g0 \
+   mm3 = h3 h2 h1 h0*/ \
+ "movq %%mm2,%%mm5\n\t" \
+ "punpcklwd %%mm3,%%mm2\n\t" \
+ "punpckhwd %%mm3,%%mm5\n\t" \
+ "movq %%mm0,%%mm3\n\t" \
+ "punpcklwd %%mm1,%%mm0\n\t" \
+ "punpckhwd %%mm1,%%mm3\n\t" \
+ /*mm0 = f1 e1 f0 e0 \
+   mm3 = f3 e3 f2 e2 \
+   mm2 = h1 g1 h0 g0 \
+   mm5 = h3 g3 h2 g2*/ \
+ "movq %%mm0,%%mm1\n\t" \
+ "punpckldq %%mm2,%%mm0\n\t" \
+ "punpckhdq %%mm2,%%mm1\n\t" \
+ "movq %%mm3,%%mm2\n\t" \
+ "punpckhdq %%mm5,%%mm3\n\t" \
+ "movq %%mm0,0x40+"_off"(%[buf])\n\t" \
+ "punpckldq %%mm5,%%mm2\n\t" \
+ /*mm0 = h0 g0 f0 e0 \
+   mm1 = h1 g1 f1 e1 \
+   mm2 = h2 g2 f2 e2 \
+   mm3 = h3 g3 f3 e3*/ \
+ "movq 0x10+"_off"(%[buf]),%%mm5\n\t" \
+ /*Second 4x4 transpose:*/ \
+ /*mm4 = a3 a2 a1 a0 \
+   mm5 = b3 b2 b1 b0 \
+   mm6 = c3 c2 c1 c0 \
+   mm7 = d3 d2 d1 d0*/ \
+ "movq %%mm6,%%mm0\n\t" \
+ "punpcklwd %%mm7,%%mm6\n\t" \
+ "movq %%mm1,0x50+"_off"(%[buf])\n\t" \
+ "punpckhwd %%mm7,%%mm0\n\t" \
+ "movq %%mm4,%%mm7\n\t" \
+ "punpcklwd %%mm5,%%mm4\n\t" \
+ "movq %%mm2,0x60+"_off"(%[buf])\n\t" \
+ "punpckhwd %%mm5,%%mm7\n\t" \
+ /*mm4 = b1 a1 b0 a0 \
+   mm7 = b3 a3 b2 a2 \
+   mm6 = d1 c1 d0 c0 \
+   mm0 = d3 c3 d2 c2*/ \
+ "movq %%mm4,%%mm5\n\t" \
+ "punpckldq %%mm6,%%mm4\n\t" \
+ "movq %%mm3,0x70+"_off"(%[buf])\n\t" \
+ "punpckhdq %%mm6,%%mm5\n\t" \
+ "movq %%mm7,%%mm6\n\t" \
+ "punpckhdq %%mm0,%%mm7\n\t" \
+ "punpckldq %%mm0,%%mm6\n\t" \
+ /*mm4 = d0 c0 b0 a0 \
+   mm5 = d1 c1 b1 a1 \
+   mm6 = d2 c2 b2 a2 \
+   mm7 = d3 c3 b3 a3*/ \
+
+static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
+ int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
+  OC_ALIGN8(ogg_int16_t  buf[64]);
+  ogg_int16_t *bufp;
+  unsigned     ret;
+  unsigned     ret2;
+  bufp=buf;
+  __asm__ __volatile__(
+    OC_LOAD_SUB_8x4("0x00")
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2("0x00")
+    /*Finish swapping out this 8x4 block to make room for the next one.
+      mm0...mm3 have been swapped out already.*/
+    "movq %%mm4,0x00(%[buf])\n\t"
+    "movq %%mm5,0x10(%[buf])\n\t"
+    "movq %%mm6,0x20(%[buf])\n\t"
+    "movq %%mm7,0x30(%[buf])\n\t"
+    OC_LOAD_SUB_8x4("0x04")
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2("0x08")
+    /*Here the first 4x4 block of output from the last transpose is the second
+       4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place, so
+       we only have to do half the loads.*/
+    "movq 0x10(%[buf]),%%mm1\n\t"
+    "movq 0x20(%[buf]),%%mm2\n\t"
+    "movq 0x30(%[buf]),%%mm3\n\t"
+    "movq 0x00(%[buf]),%%mm0\n\t"
+    OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38")
+    /*Up to this point, everything fit in 16 bits (8 input + 1 for the
+       difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
+       for the factor of two we dropped + 3 for the vertical accumulation).
+      Now we finally have to promote things to dwords.
+      We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
+       latency of pmaddwd by starting the next series of loads now.*/
+    "mov %[thresh],%[ret2]\n\t"
+    "pmaddwd %%mm7,%%mm0\n\t"
+    "movq 0x50(%[buf]),%%mm1\n\t"
+    "movq 0x58(%[buf]),%%mm5\n\t"
+    "movq %%mm0,%%mm4\n\t"
+    "movq 0x60(%[buf]),%%mm2\n\t"
+    "punpckhdq %%mm0,%%mm0\n\t"
+    "movq 0x68(%[buf]),%%mm6\n\t"
+    "paddd %%mm0,%%mm4\n\t"
+    "movq 0x70(%[buf]),%%mm3\n\t"
+    "movd %%mm4,%[ret]\n\t"
+    "movq 0x78(%[buf]),%%mm7\n\t"
+    /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
+       added to them, and a factor of two removed; correct the final sum here.*/
+    "lea -32(%[ret],%[ret]),%[ret]\n\t"
+    "movq 0x40(%[buf]),%%mm0\n\t"
+    "cmp %[ret2],%[ret]\n\t"
+    "movq 0x48(%[buf]),%%mm4\n\t"
+    "jae 1f\n\t"
+    OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
+    "pmaddwd %%mm7,%%mm0\n\t"
+    /*There isn't much to stick in here to hide the latency this time, but the
+       alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
+       latency is even worse.*/
+    "sub $32,%[ret]\n\t"
+    "movq %%mm0,%%mm4\n\t"
+    "punpckhdq %%mm0,%%mm0\n\t"
+    "paddd %%mm0,%%mm4\n\t"
+    "movd %%mm4,%[ret2]\n\t"
+    "lea (%[ret],%[ret2],2),%[ret]\n\t"
+    ".p2align 4,,15\n\t"
+    "1:\n\t"
+    /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+       and %[ret2] with some of the inputs, since for once we don't write to
+       them until after we're done using everything but %[buf] (which is also
+       listed as an output to ensure gcc _doesn't_ alias them against it).*/
+    /*Note that _src_ystride and _ref_ystride must be given non-overlapping
+       constraints, otherewise if gcc can prove they're equal it will allocate
+       them to the same register (which is bad); _src and _ref face a similar
+       problem, though those are never actually the same.*/
+    :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+    :[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride),
+     [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride),
+     [thresh]"m"(_thresh)
+    /*We have to use neg, so we actually clobber the condition codes for once
+       (not to mention cmp, sub, and add).*/
+    :"cc"
+  );
+  return ret;
+}
+
+unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+}
+
+/*Our internal implementation of frag_copy2 takes an extra stride parameter so
+   we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
+  __asm__ __volatile__(
+    /*Load the first 3 rows.*/
+    "movq (%[src1]),%%mm0\n\t"
+    "movq (%[src2]),%%mm1\n\t"
+    "movq (%[src1],%[src_ystride]),%%mm2\n\t"
+    "lea (%[src1],%[src_ystride],2),%[src1]\n\t"
+    "movq (%[src2],%[src_ystride]),%%mm3\n\t"
+    "lea (%[src2],%[src_ystride],2),%[src2]\n\t"
+    "pxor %%mm7,%%mm7\n\t"
+    "movq (%[src1]),%%mm4\n\t"
+    "pcmpeqb %%mm6,%%mm6\n\t"
+    "movq (%[src2]),%%mm5\n\t"
+    /*mm7={1}x8.*/
+    "psubb %%mm6,%%mm7\n\t"
+    /*Start averaging %%mm0 and %%mm1 into %%mm6.*/
+    "movq %%mm0,%%mm6\n\t"
+    "pxor %%mm1,%%mm0\n\t"
+    "pavgb %%mm1,%%mm6\n\t"
+    /*%%mm1 is free, start averaging %%mm3 into %%mm2 using %%mm1.*/
+    "movq %%mm2,%%mm1\n\t"
+    "pand %%mm7,%%mm0\n\t"
+    "pavgb %%mm3,%%mm2\n\t"
+    "pxor %%mm3,%%mm1\n\t"
+    /*%%mm3 is free.*/
+    "psubb %%mm0,%%mm6\n\t"
+    /*%%mm0 is free, start loading the next row.*/
+    "movq (%[src1],%[src_ystride]),%%mm0\n\t"
+    /*Start averaging %%mm5 and %%mm4 using %%mm3.*/
+    "movq %%mm4,%%mm3\n\t"
+    /*%%mm6 (row 0) is done; write it out.*/
+    "movq %%mm6,(%[dst])\n\t"
+    "pand %%mm7,%%mm1\n\t"
+    "pavgb %%mm5,%%mm4\n\t"
+    "psubb %%mm1,%%mm2\n\t"
+    /*%%mm1 is free, continue loading the next row.*/
+    "movq (%[src2],%[src_ystride]),%%mm1\n\t"
+    "pxor %%mm5,%%mm3\n\t"
+    "lea (%[src1],%[src_ystride],2),%[src1]\n\t"
+    /*%%mm2 (row 1) is done; write it out.*/
+    "movq %%mm2,(%[dst],%[dst_ystride])\n\t"
+    "pand %%mm7,%%mm3\n\t"
+    /*Start loading the next row.*/
+    "movq (%[src1]),%%mm2\n\t"
+    "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
+    "psubb %%mm3,%%mm4\n\t"
+    "lea (%[src2],%[src_ystride],2),%[src2]\n\t"
+    /*%%mm4 (row 2) is done; write it out.*/
+    "movq %%mm4,(%[dst])\n\t"
+    /*Continue loading the next row.*/
+    "movq (%[src2]),%%mm3\n\t"
+    /*Start averaging %%mm0 and %%mm1 into %%mm6.*/
+    "movq %%mm0,%%mm6\n\t"
+    "pxor %%mm1,%%mm0\n\t"
+    /*Start loading the next row.*/
+    "movq (%[src1],%[src_ystride]),%%mm4\n\t"
+    "pavgb %%mm1,%%mm6\n\t"
+    /*%%mm1 is free; start averaging %%mm3 into %%mm2 using %%mm1.*/
+    "movq %%mm2,%%mm1\n\t"
+    "pand %%mm7,%%mm0\n\t"
+    /*Continue loading the next row.*/
+    "movq (%[src2],%[src_ystride]),%%mm5\n\t"
+    "pavgb %%mm3,%%mm2\n\t"
+    "lea (%[src1],%[src_ystride],2),%[src1]\n\t"
+    "pxor %%mm3,%%mm1\n\t"
+    /*%%mm3 is free.*/
+    "psubb %%mm0,%%mm6\n\t"
+    /*%%mm0 is free, start loading the next row.*/
+    "movq (%[src1]),%%mm0\n\t"
+    /*Start averaging %%mm5 into %%mm4 using %%mm3.*/
+    "movq %%mm4,%%mm3\n\t"
+    /*%%mm6 (row 3) is done; write it out.*/
+    "movq %%mm6,(%[dst],%[dst_ystride])\n\t"
+    "pand %%mm7,%%mm1\n\t"
+    "lea (%[src2],%[src_ystride],2),%[src2]\n\t"
+    "pavgb %%mm5,%%mm4\n\t"
+    "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
+    "psubb %%mm1,%%mm2\n\t"
+    /*%%mm1 is free; continue loading the next row.*/
+    "movq (%[src2]),%%mm1\n\t"
+    "pxor %%mm5,%%mm3\n\t"
+    /*%%mm2 (row 4) is done; write it out.*/
+    "movq %%mm2,(%[dst])\n\t"
+    "pand %%mm7,%%mm3\n\t"
+    /*Start loading the next row.*/
+    "movq (%[src1],%[src_ystride]),%%mm2\n\t"
+    "psubb %%mm3,%%mm4\n\t"
+    /*Start averaging %%mm0 and %%mm1 into %%mm6.*/
+    "movq %%mm0,%%mm6\n\t"
+    /*Continue loading the next row.*/
+    "movq (%[src2],%[src_ystride]),%%mm3\n\t"
+    /*%%mm4 (row 5) is done; write it out.*/
+    "movq %%mm4,(%[dst],%[dst_ystride])\n\t"
+    "pxor %%mm1,%%mm0\n\t"
+    "pavgb %%mm1,%%mm6\n\t"
+    /*%%mm4 is free; start averaging %%mm3 into %%mm2 using %%mm4.*/
+    "movq %%mm2,%%mm4\n\t"
+    "pand %%mm7,%%mm0\n\t"
+    "pavgb %%mm3,%%mm2\n\t"
+    "pxor %%mm3,%%mm4\n\t"
+    "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
+    "psubb %%mm0,%%mm6\n\t"
+    "pand %%mm7,%%mm4\n\t"
+    /*%%mm6 (row 6) is done, write it out.*/
+    "movq %%mm6,(%[dst])\n\t"
+    "psubb %%mm4,%%mm2\n\t"
+    /*%%mm2 (row 7) is done, write it out.*/
+    "movq %%mm2,(%[dst],%[dst_ystride])\n\t"
+    :[dst]"+r"(_dst),[src1]"+%r"(_src1),[src2]"+r"(_src2)
+    :[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
+     [src_ystride]"r"((ptrdiff_t)_src_ystride)
+    :"memory"
+  );
+}
+
+unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  OC_ALIGN8(unsigned char ref[64]);
+  oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
+  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+}
+
+unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
+ int _ystride){
+  OC_ALIGN8(ogg_int16_t  buf[64]);
+  ogg_int16_t *bufp;
+  unsigned     ret;
+  unsigned     ret2;
+  bufp=buf;
+  __asm__ __volatile__(
+    OC_LOAD_8x4("0x00")
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2("0x00")
+    /*Finish swapping out this 8x4 block to make room for the next one.
+      mm0...mm3 have been swapped out already.*/
+    "movq %%mm4,0x00(%[buf])\n\t"
+    "movq %%mm5,0x10(%[buf])\n\t"
+    "movq %%mm6,0x20(%[buf])\n\t"
+    "movq %%mm7,0x30(%[buf])\n\t"
+    OC_LOAD_8x4("0x04")
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2("0x08")
+    /*Here the first 4x4 block of output from the last transpose is the second
+       4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place, so
+       we only have to do half the loads.*/
+    "movq 0x10(%[buf]),%%mm1\n\t"
+    "movq 0x20(%[buf]),%%mm2\n\t"
+    "movq 0x30(%[buf]),%%mm3\n\t"
+    "movq 0x00(%[buf]),%%mm0\n\t"
+    /*We split out the stages here so we can save the DC coefficient in the
+       middle.*/
+    OC_HADAMARD_AB_8x4
+    OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
+    "movd %%mm1,%[ret]\n\t"
+    OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
+    /*Up to this point, everything fit in 16 bits (8 input + 1 for the
+       difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
+       for the factor of two we dropped + 3 for the vertical accumulation).
+      Now we finally have to promote things to dwords.
+      We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
+       latency of pmaddwd by starting the next series of loads now.*/
+    "pmaddwd %%mm7,%%mm0\n\t"
+    "movq 0x50(%[buf]),%%mm1\n\t"
+    "movq 0x58(%[buf]),%%mm5\n\t"
+    "movq 0x60(%[buf]),%%mm2\n\t"
+    "movq %%mm0,%%mm4\n\t"
+    "movq 0x68(%[buf]),%%mm6\n\t"
+    "punpckhdq %%mm0,%%mm0\n\t"
+    "movq 0x70(%[buf]),%%mm3\n\t"
+    "paddd %%mm0,%%mm4\n\t"
+    "movq 0x78(%[buf]),%%mm7\n\t"
+    "movd %%mm4,%[ret2]\n\t"
+    "movq 0x40(%[buf]),%%mm0\n\t"
+    "movq 0x48(%[buf]),%%mm4\n\t"
+    OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
+    "pmaddwd %%mm7,%%mm0\n\t"
+    /*We assume that the DC coefficient is always positive (which is true,
+       because the input to the INTRA transform was not a difference).*/
+    "movzx %w[ret],%[ret]\n\t"
+    "add %[ret2],%[ret2]\n\t"
+    "sub %[ret],%[ret2]\n\t"
+    "movq %%mm0,%%mm4\n\t"
+    "punpckhdq %%mm0,%%mm0\n\t"
+    "paddd %%mm0,%%mm4\n\t"
+    "movd %%mm4,%[ret]\n\t"
+    "lea -64(%[ret2],%[ret],2),%[ret]\n\t"
+    /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+       and %[ret2] with some of the inputs, since for once we don't write to
+       them until after we're done using everything but %[buf] (which is also
+       listed as an output to ensure gcc _doesn't_ alias them against it).*/
+    :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+    :[src]"r"(_src),[src4]"r"(_src+4*_ystride),
+     [ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride)
+    /*We have to use sub, so we actually clobber the condition codes for once
+       (not to mention add).*/
+    :"cc"
+  );
+  return ret;
+}
+
+void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  int i;
+  __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
+  for(i=4;i-->0;){
+    __asm__ __volatile__(
+      /*mm0=[src]*/
+      "movq (%[src]),%%mm0\n\t"
+      /*mm1=[ref]*/
+      "movq (%[ref]),%%mm1\n\t"
+      /*mm4=[src+ystride]*/
+      "movq (%[src],%[ystride]),%%mm4\n\t"
+      /*mm5=[ref+ystride]*/
+      "movq (%[ref],%[ystride]),%%mm5\n\t"
+      /*Compute [src]-[ref].*/
+      "movq %%mm0,%%mm2\n\t"
+      "punpcklbw %%mm7,%%mm0\n\t"
+      "movq %%mm1,%%mm3\n\t"
+      "punpckhbw %%mm7,%%mm2\n\t"
+      "punpcklbw %%mm7,%%mm1\n\t"
+      "punpckhbw %%mm7,%%mm3\n\t"
+      "psubw %%mm1,%%mm0\n\t"
+      "psubw %%mm3,%%mm2\n\t"
+      /*Compute [src+ystride]-[ref+ystride].*/
+      "movq %%mm4,%%mm1\n\t"
+      "punpcklbw %%mm7,%%mm4\n\t"
+      "movq %%mm5,%%mm3\n\t"
+      "punpckhbw %%mm7,%%mm1\n\t"
+      "lea (%[src],%[ystride],2),%[src]\n\t"
+      "punpcklbw %%mm7,%%mm5\n\t"
+      "lea (%[ref],%[ystride],2),%[ref]\n\t"
+      "punpckhbw %%mm7,%%mm3\n\t"
+      "psubw %%mm5,%%mm4\n\t"
+      "psubw %%mm3,%%mm1\n\t"
+      /*Write the answer out.*/
+      "movq %%mm0,0x00(%[residue])\n\t"
+      "movq %%mm2,0x08(%[residue])\n\t"
+      "movq %%mm4,0x10(%[residue])\n\t"
+      "movq %%mm1,0x18(%[residue])\n\t"
+      "lea 0x20(%[residue]),%[residue]\n\t"
+      :[residue]"+r"(_residue),[src]"+r"(_src),[ref]"+r"(_ref)
+      :[ystride]"r"((ptrdiff_t)_ystride)
+      :"memory"
+    );
+  }
+}
+
+void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64],
+ const unsigned char *_src,int _ystride){
+  ptrdiff_t ystride3;
+  __asm__ __volatile__(
+    /*mm0=[src]*/
+    "movq (%[src]),%%mm0\n\t"
+    /*mm1=[src+ystride]*/
+    "movq (%[src],%[ystride]),%%mm1\n\t"
+    /*mm6={-1}x4*/
+    "pcmpeqw %%mm6,%%mm6\n\t"
+    /*mm2=[src+2*ystride]*/
+    "movq (%[src],%[ystride],2),%%mm2\n\t"
+    /*[ystride3]=3*[ystride]*/
+    "lea (%[ystride],%[ystride],2),%[ystride3]\n\t"
+    /*mm6={1}x4*/
+    "psllw $15,%%mm6\n\t"
+    /*mm3=[src+3*ystride]*/
+    "movq (%[src],%[ystride3]),%%mm3\n\t"
+    /*mm6={128}x4*/
+    "psrlw $8,%%mm6\n\t"
+    /*mm7=0*/
+    "pxor %%mm7,%%mm7\n\t"
+    /*[src]=[src]+4*[ystride]*/
+    "lea (%[src],%[ystride],4),%[src]\n\t"
+    /*Compute [src]-128 and [src+ystride]-128*/
+    "movq %%mm0,%%mm4\n\t"
+    "punpcklbw %%mm7,%%mm0\n\t"
+    "movq %%mm1,%%mm5\n\t"
+    "punpckhbw %%mm7,%%mm4\n\t"
+    "psubw %%mm6,%%mm0\n\t"
+    "punpcklbw %%mm7,%%mm1\n\t"
+    "psubw %%mm6,%%mm4\n\t"
+    "punpckhbw %%mm7,%%mm5\n\t"
+    "psubw %%mm6,%%mm1\n\t"
+    "psubw %%mm6,%%mm5\n\t"
+    /*Write the answer out.*/
+    "movq %%mm0,0x00(%[residue])\n\t"
+    "movq %%mm4,0x08(%[residue])\n\t"
+    "movq %%mm1,0x10(%[residue])\n\t"
+    "movq %%mm5,0x18(%[residue])\n\t"
+    /*mm0=[src+4*ystride]*/
+    "movq (%[src]),%%mm0\n\t"
+    /*mm1=[src+5*ystride]*/
+    "movq (%[src],%[ystride]),%%mm1\n\t"
+    /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/
+    "movq %%mm2,%%mm4\n\t"
+    "punpcklbw %%mm7,%%mm2\n\t"
+    "movq %%mm3,%%mm5\n\t"
+    "punpckhbw %%mm7,%%mm4\n\t"
+    "psubw %%mm6,%%mm2\n\t"
+    "punpcklbw %%mm7,%%mm3\n\t"
+    "psubw %%mm6,%%mm4\n\t"
+    "punpckhbw %%mm7,%%mm5\n\t"
+    "psubw %%mm6,%%mm3\n\t"
+    "psubw %%mm6,%%mm5\n\t"
+    /*Write the answer out.*/
+    "movq %%mm2,0x20(%[residue])\n\t"
+    "movq %%mm4,0x28(%[residue])\n\t"
+    "movq %%mm3,0x30(%[residue])\n\t"
+    "movq %%mm5,0x38(%[residue])\n\t"
+    /*mm2=[src+6*ystride]*/
+    "movq (%[src],%[ystride],2),%%mm2\n\t"
+    /*mm3=[src+7*ystride]*/
+    "movq (%[src],%[ystride3]),%%mm3\n\t"
+    /*Compute [src+4*ystride]-128 and [src+5*ystride]-128*/
+    "movq %%mm0,%%mm4\n\t"
+    "punpcklbw %%mm7,%%mm0\n\t"
+    "movq %%mm1,%%mm5\n\t"
+    "punpckhbw %%mm7,%%mm4\n\t"
+    "psubw %%mm6,%%mm0\n\t"
+    "punpcklbw %%mm7,%%mm1\n\t"
+    "psubw %%mm6,%%mm4\n\t"
+    "punpckhbw %%mm7,%%mm5\n\t"
+    "psubw %%mm6,%%mm1\n\t"
+    "psubw %%mm6,%%mm5\n\t"
+    /*Write the answer out.*/
+    "movq %%mm0,0x40(%[residue])\n\t"
+    "movq %%mm4,0x48(%[residue])\n\t"
+    "movq %%mm1,0x50(%[residue])\n\t"
+    "movq %%mm5,0x58(%[residue])\n\t"
+    /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/
+    "movq %%mm2,%%mm4\n\t"
+    "punpcklbw %%mm7,%%mm2\n\t"
+    "movq %%mm3,%%mm5\n\t"
+    "punpckhbw %%mm7,%%mm4\n\t"
+    "psubw %%mm6,%%mm2\n\t"
+    "punpcklbw %%mm7,%%mm3\n\t"
+    "psubw %%mm6,%%mm4\n\t"
+    "punpckhbw %%mm7,%%mm5\n\t"
+    "psubw %%mm6,%%mm3\n\t"
+    "psubw %%mm6,%%mm5\n\t"
+    /*Write the answer out.*/
+    "movq %%mm2,0x60(%[residue])\n\t"
+    "movq %%mm4,0x68(%[residue])\n\t"
+    "movq %%mm3,0x70(%[residue])\n\t"
+    "movq %%mm5,0x78(%[residue])\n\t"
+    :[src]"+r"(_src),[ystride3]"=&r"(ystride3)
+    :[residue]"r"(_residue),[ystride]"r"((ptrdiff_t)_ystride)
+    :"memory"
+  );
+}
+
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride);
+}
+
+#endif

Copied: trunk/theora/lib/x86/mmxfdct.c (from rev 16442, trunk/theora/lib/enc/x86/mmxfdct.c)
===================================================================
--- trunk/theora/lib/x86/mmxfdct.c	                        (rev 0)
+++ trunk/theora/lib/x86/mmxfdct.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,665 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************/
+/*MMX fDCT implementation for x86_32*/
+/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+# define OC_FDCT_STAGE1_8x4 \
+ "#OC_FDCT_STAGE1_8x4\n\t" \
+ /*Stage 1:*/ \
+ /*mm0=t7'=t0-t7*/ \
+ "psubw %%mm7,%%mm0\n\t" \
+ "paddw %%mm7,%%mm7\n\t" \
+ /*mm1=t6'=t1-t6*/ \
+ "psubw %%mm6,%%mm1\n\t" \
+ "paddw %%mm6,%%mm6\n\t" \
+ /*mm2=t5'=t2-t5*/ \
+ "psubw %%mm5,%%mm2\n\t" \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*mm3=t4'=t3-t4*/ \
+ "psubw %%mm4,%%mm3\n\t" \
+ "paddw %%mm4,%%mm4\n\t" \
+ /*mm7=t0'=t0+t7*/ \
+ "paddw %%mm0,%%mm7\n\t" \
+ /*mm6=t1'=t1+t6*/ \
+ "paddw %%mm1,%%mm6\n\t" \
+ /*mm5=t2'=t2+t5*/ \
+ "paddw %%mm2,%%mm5\n\t" \
+ /*mm4=t3'=t3+t4*/ \
+ "paddw %%mm3,%%mm4\n\t" \
+
+# define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \
+ "#OC_FDCT8x4\n\t" \
+ /*Stage 2:*/ \
+ /*mm7=t3''=t0'-t3'*/ \
+ "psubw %%mm4,%%mm7\n\t" \
+ "paddw %%mm4,%%mm4\n\t" \
+ /*mm6=t2''=t1'-t2'*/ \
+ "psubw %%mm5,%%mm6\n\t" \
+ "movq %%mm7,"_r6"(%[y])\n\t" \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*mm1=t5''=t6'-t5'*/ \
+ "psubw %%mm2,%%mm1\n\t" \
+ "movq %%mm6,"_r2"(%[y])\n\t" \
+ /*mm4=t0''=t0'+t3'*/ \
+ "paddw %%mm7,%%mm4\n\t" \
+ "paddw %%mm2,%%mm2\n\t" \
+ /*mm5=t1''=t1'+t2'*/ \
+ "movq %%mm4,"_r0"(%[y])\n\t" \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*mm2=t6''=t6'+t5'*/ \
+ "paddw %%mm1,%%mm2\n\t" \
+ "movq %%mm5,"_r4"(%[y])\n\t" \
+ /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \
+ /*mm4, mm5, mm6, mm7 are free.*/ \
+ /*Stage 3:*/ \
+ /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \
+ "mov $0x5A806A0A,%[a]\n\t" \
+ "pcmpeqb %%mm6,%%mm6\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psrlw $15,%%mm6\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddw %%mm6,%%mm6\n\t" \
+ /*mm0=0, m2={-1}x4 \
+   mm5:mm4=t5''*27146+0xB500*/ \
+ "movq %%mm1,%%mm4\n\t" \
+ "movq %%mm1,%%mm5\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "movq %%mm2,"_r3"(%[y])\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "movq %%mm0,"_r7"(%[y])\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pcmpeqb %%mm2,%%mm2\n\t" \
+ /*mm2=t6'', mm1=t5''+(t5''!=0) \
+   mm4=(t5''*27146+0xB500>>16)*/ \
+ "pcmpeqw %%mm1,%%mm0\n\t" \
+ "psrad $16,%%mm4\n\t" \
+ "psubw %%mm2,%%mm0\n\t" \
+ "movq "_r3"(%[y]),%%mm2\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "paddw %%mm0,%%mm1\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \
+ "paddw %%mm1,%%mm4\n\t" \
+ "movq "_r7"(%[y]),%%mm0\n\t" \
+ "psraw $1,%%mm4\n\t" \
+ "movq %%mm3,%%mm1\n\t" \
+ /*mm3=t4''=t4'+s*/ \
+ "paddw %%mm4,%%mm3\n\t" \
+ /*mm1=t5'''=t4'-s*/ \
+ "psubw %%mm4,%%mm1\n\t" \
+ /*mm1=0, mm3={-1}x4 \
+   mm5:mm4=t6''*27146+0xB500*/ \
+ "movq %%mm2,%%mm4\n\t" \
+ "movq %%mm2,%%mm5\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "movq %%mm1,"_r5"(%[y])\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "movq %%mm3,"_r1"(%[y])\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "pxor %%mm1,%%mm1\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pcmpeqb %%mm3,%%mm3\n\t" \
+ /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \
+ "psrad $16,%%mm4\n\t" \
+ "pcmpeqw %%mm2,%%mm1\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "psubw %%mm3,%%mm1\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "paddw %%mm1,%%mm2\n\t" \
+ /*mm1=t1'' \
+   mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \
+ "paddw %%mm2,%%mm4\n\t" \
+ "movq "_r4"(%[y]),%%mm1\n\t" \
+ "psraw $1,%%mm4\n\t" \
+ "movq %%mm0,%%mm2\n\t" \
+ /*mm7={54491-0x7FFF,0x7FFF}x2 \
+   mm0=t7''=t7'+s*/ \
+ "paddw %%mm4,%%mm0\n\t" \
+ /*mm2=t6'''=t7'-s*/ \
+ "psubw %%mm4,%%mm2\n\t" \
+ /*Stage 4:*/ \
+ /*mm0=0, mm2=t0'' \
+   mm5:mm4=t1''*27146+0xB500*/ \
+ "movq %%mm1,%%mm4\n\t" \
+ "movq %%mm1,%%mm5\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "movq %%mm2,"_r3"(%[y])\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "movq "_r0"(%[y]),%%mm2\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "movq %%mm0,"_r7"(%[y])\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ /*mm7={27146,0x4000>>1}x2 \
+   mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \
+ "psrad $16,%%mm4\n\t" \
+ "mov $0x20006A0A,%[a]\n\t" \
+ "pcmpeqw %%mm1,%%mm0\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "psubw %%mm3,%%mm0\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "paddw %%mm1,%%mm0\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddw %%mm4,%%mm0\n\t" \
+ /*mm6={0x00000E3D}x2 \
+   mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \
+ "movq %%mm2,%%mm4\n\t" \
+ "movq %%mm2,%%mm5\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "mov $0x0E3D,%[a]\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "movd %[a],%%mm6\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pxor %%mm1,%%mm1\n\t" \
+ "punpckldq %%mm6,%%mm6\n\t" \
+ "pcmpeqw %%mm2,%%mm1\n\t" \
+ /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \
+ "psrad $16,%%mm4\n\t" \
+ "psubw %%mm3,%%mm1\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "paddw %%mm1,%%mm2\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "movq "_r5"(%[y]),%%mm1\n\t" \
+ "paddw %%mm2,%%mm4\n\t" \
+ /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
+   The naive implementation could cause overflow, so we use \
+    u=(r&s)+((r^s)>>1).*/ \
+ "movq "_r3"(%[y]),%%mm2\n\t" \
+ "movq %%mm0,%%mm7\n\t" \
+ "pxor %%mm4,%%mm0\n\t" \
+ "pand %%mm4,%%mm7\n\t" \
+ "psraw $1,%%mm0\n\t" \
+ "mov $0x7FFF54DC,%[a]\n\t" \
+ "paddw %%mm7,%%mm0\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ /*mm7={54491-0x7FFF,0x7FFF}x2 \
+   mm4=_y[4]=v=r-u*/ \
+ "psubw %%mm0,%%mm4\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "movq %%mm4,"_r4"(%[y])\n\t" \
+ /*mm0=0, mm7={36410}x4 \
+   mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \
+ "movq %%mm1,%%mm4\n\t" \
+ "movq %%mm1,%%mm5\n\t" \
+ "punpcklwd %%mm1,%%mm4\n\t" \
+ "mov $0x8E3A8E3A,%[a]\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "movq %%mm0,"_r0"(%[y])\n\t" \
+ "punpckhwd %%mm1,%%mm5\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pcmpeqw %%mm0,%%mm1\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psubw %%mm3,%%mm1\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddd %%mm6,%%mm4\n\t" \
+ "paddd %%mm6,%%mm5\n\t" \
+ /*mm0=0 \
+   mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \
+ "movq %%mm2,%%mm6\n\t" \
+ "movq %%mm2,%%mm3\n\t" \
+ "pmulhw %%mm7,%%mm6\n\t" \
+ "paddw %%mm2,%%mm1\n\t" \
+ "pmullw %%mm7,%%mm3\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ "paddw %%mm1,%%mm6\n\t" \
+ "movq %%mm3,%%mm1\n\t" \
+ "punpckhwd %%mm6,%%mm3\n\t" \
+ "punpcklwd %%mm6,%%mm1\n\t" \
+ /*mm3={-1}x4, mm6={1}x4 \
+   mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \
+ "paddd %%mm3,%%mm5\n\t" \
+ "paddd %%mm1,%%mm4\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "pxor %%mm6,%%mm6\n\t" \
+ "psrad $16,%%mm4\n\t" \
+ "pcmpeqb %%mm3,%%mm3\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "psubw %%mm3,%%mm6\n\t" \
+ /*mm1=t7'', mm7={26568,0x3400}x2 \
+   mm2=s=t6'''-(36410*u>>16)*/ \
+ "movq %%mm4,%%mm1\n\t" \
+ "mov $0x340067C8,%[a]\n\t" \
+ "pmulhw %%mm7,%%mm4\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "movq %%mm1,"_r5"(%[y])\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddw %%mm1,%%mm4\n\t" \
+ "movq "_r7"(%[y]),%%mm1\n\t" \
+ "psubw %%mm4,%%mm2\n\t" \
+ /*mm6={0x00007B1B}x2 \
+   mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \
+ "movq %%mm2,%%mm4\n\t" \
+ "movq %%mm2,%%mm5\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "pcmpeqw %%mm2,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "mov $0x7B1B,%[a]\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "movd %[a],%%mm6\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "psubw %%mm3,%%mm0\n\t" \
+ "punpckldq %%mm6,%%mm6\n\t" \
+ /*mm7={64277-0x7FFF,0x7FFF}x2 \
+   mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \
+ "psrad $17,%%mm4\n\t" \
+ "paddw %%mm0,%%mm2\n\t" \
+ "psrad $17,%%mm5\n\t" \
+ "mov $0x7FFF7B16,%[a]\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "paddw %%mm4,%%mm2\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ /*mm0=0, mm7={12785}x4 \
+   mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \
+ "movq %%mm1,%%mm4\n\t" \
+ "movq %%mm1,%%mm5\n\t" \
+ "movq %%mm2,"_r3"(%[y])\n\t" \
+ "punpcklwd %%mm1,%%mm4\n\t" \
+ "movq "_r1"(%[y]),%%mm2\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "mov $0x31F131F1,%[a]\n\t" \
+ "punpckhwd %%mm1,%%mm5\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "pcmpeqw %%mm0,%%mm1\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psubw %%mm3,%%mm1\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddd %%mm6,%%mm4\n\t" \
+ "paddd %%mm6,%%mm5\n\t" \
+ /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \
+ "movq %%mm2,%%mm6\n\t" \
+ "movq %%mm2,%%mm3\n\t" \
+ "pmulhw %%mm7,%%mm6\n\t" \
+ "pmullw %%mm7,%%mm3\n\t" \
+ "paddw %%mm1,%%mm6\n\t" \
+ "movq %%mm3,%%mm1\n\t" \
+ "punpckhwd %%mm6,%%mm3\n\t" \
+ "punpcklwd %%mm6,%%mm1\n\t" \
+ /*mm3={-1}x4, mm6={1}x4 \
+   mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \
+ "paddd %%mm3,%%mm5\n\t" \
+ "paddd %%mm1,%%mm4\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "pxor %%mm6,%%mm6\n\t" \
+ "psrad $16,%%mm4\n\t" \
+ "pcmpeqb %%mm3,%%mm3\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "psubw %%mm3,%%mm6\n\t" \
+ /*mm1=t3'', mm7={20539,0x3000}x2 \
+   mm4=s=(12785*u>>16)-t4''*/ \
+ "movq %%mm4,"_r1"(%[y])\n\t" \
+ "pmulhw %%mm7,%%mm4\n\t" \
+ "mov $0x3000503B,%[a]\n\t" \
+ "movq "_r6"(%[y]),%%mm1\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psubw %%mm2,%%mm4\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ /*mm6={0x00006CB7}x2 \
+   mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \
+ "movq %%mm4,%%mm5\n\t" \
+ "movq %%mm4,%%mm2\n\t" \
+ "punpcklwd %%mm6,%%mm4\n\t" \
+ "pcmpeqw %%mm2,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "mov $0x6CB7,%[a]\n\t" \
+ "punpckhwd %%mm6,%%mm5\n\t" \
+ "movd %[a],%%mm6\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "psubw %%mm3,%%mm0\n\t" \
+ "punpckldq %%mm6,%%mm6\n\t" \
+ /*mm7={60547-0x7FFF,0x7FFF}x2 \
+   mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \
+ "psrad $20,%%mm4\n\t" \
+ "paddw %%mm0,%%mm2\n\t" \
+ "psrad $20,%%mm5\n\t" \
+ "mov $0x7FFF6C84,%[a]\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "paddw %%mm4,%%mm2\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ /*mm0=0, mm7={25080}x4 \
+   mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \
+ "movq %%mm1,%%mm4\n\t" \
+ "movq %%mm1,%%mm5\n\t" \
+ "movq %%mm2,"_r7"(%[y])\n\t" \
+ "punpcklwd %%mm1,%%mm4\n\t" \
+ "movq "_r2"(%[y]),%%mm2\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "mov $0x61F861F8,%[a]\n\t" \
+ "punpckhwd %%mm1,%%mm5\n\t" \
+ "pxor %%mm0,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm5\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "pcmpeqw %%mm0,%%mm1\n\t" \
+ "psubw %%mm3,%%mm1\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "paddd %%mm6,%%mm4\n\t" \
+ "paddd %%mm6,%%mm5\n\t" \
+ /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \
+ "movq %%mm2,%%mm6\n\t" \
+ "movq %%mm2,%%mm3\n\t" \
+ "pmulhw %%mm7,%%mm6\n\t" \
+ "pmullw %%mm7,%%mm3\n\t" \
+ "paddw %%mm1,%%mm6\n\t" \
+ "movq %%mm3,%%mm1\n\t" \
+ "punpckhwd %%mm6,%%mm3\n\t" \
+ "punpcklwd %%mm6,%%mm1\n\t" \
+ /*mm1={-1}x4 \
+   mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \
+ "paddd %%mm3,%%mm5\n\t" \
+ "paddd %%mm1,%%mm4\n\t" \
+ "psrad $16,%%mm5\n\t" \
+ "mov $0x28005460,%[a]\n\t" \
+ "psrad $16,%%mm4\n\t" \
+ "pcmpeqb %%mm1,%%mm1\n\t" \
+ "packssdw %%mm5,%%mm4\n\t" \
+ /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \
+   mm4=s=(25080*u>>16)-t2''*/ \
+ "movq %%mm4,%%mm6\n\t" \
+ "pmulhw %%mm7,%%mm4\n\t" \
+ "pxor %%mm5,%%mm5\n\t" \
+ "movd %[a],%%mm7\n\t" \
+ "psubw %%mm1,%%mm5\n\t" \
+ "punpckldq %%mm7,%%mm7\n\t" \
+ "psubw %%mm2,%%mm4\n\t" \
+ /*mm2=s+(s!=0) \
+   mm4:mm3=s*21600+0x2800*/ \
+ "movq %%mm4,%%mm3\n\t" \
+ "movq %%mm4,%%mm2\n\t" \
+ "punpckhwd %%mm5,%%mm4\n\t" \
+ "pcmpeqw %%mm2,%%mm0\n\t" \
+ "pmaddwd %%mm7,%%mm4\n\t" \
+ "psubw %%mm1,%%mm0\n\t" \
+ "punpcklwd %%mm5,%%mm3\n\t" \
+ "paddw %%mm0,%%mm2\n\t" \
+ "pmaddwd %%mm7,%%mm3\n\t" \
+ /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \
+   mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \
+ "movq "_r4"(%[y]),%%mm0\n\t" \
+ "psrad $18,%%mm4\n\t" \
+ "movq "_r5"(%[y]),%%mm5\n\t" \
+ "psrad $18,%%mm3\n\t" \
+ "movq "_r7"(%[y]),%%mm1\n\t" \
+ "packssdw %%mm4,%%mm3\n\t" \
+ "movq "_r0"(%[y]),%%mm4\n\t" \
+ "paddw %%mm2,%%mm3\n\t" \
+
+/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7].
+  On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and
+   {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/
+# define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \
+ "#OC_TRANSPOSE8x4\n\t" \
+ /*First 4x4 transpose:*/ \
+ /*mm0 = e3 e2 e1 e0 \
+   mm5 = f3 f2 f1 f0 \
+   mm3 = g3 g2 g1 g0 \
+   mm1 = h3 h2 h1 h0*/ \
+ "movq %%mm0,%%mm2\n\t" \
+ "punpcklwd %%mm5,%%mm0\n\t" \
+ "punpckhwd %%mm5,%%mm2\n\t" \
+ "movq %%mm3,%%mm5\n\t" \
+ "punpcklwd %%mm1,%%mm3\n\t" \
+ "punpckhwd %%mm1,%%mm5\n\t" \
+ /*mm0 = f1 e1 f0 e0 \
+   mm2 = f3 e3 f2 e2 \
+   mm3 = h1 g1 h0 g0 \
+   mm5 = h3 g3 h2 g2*/ \
+ "movq %%mm0,%%mm1\n\t" \
+ "punpckldq %%mm3,%%mm0\n\t" \
+ "movq %%mm0,"_r4"(%[y])\n\t" \
+ "punpckhdq %%mm3,%%mm1\n\t" \
+ "movq "_r1"(%[y]),%%mm0\n\t" \
+ "movq %%mm2,%%mm3\n\t" \
+ "punpckldq %%mm5,%%mm2\n\t" \
+ "punpckhdq %%mm5,%%mm3\n\t" \
+ "movq "_r3"(%[y]),%%mm5\n\t" \
+ /*_y[4] = h0 g0 f0 e0 \
+    mm1  = h1 g1 f1 e1 \
+    mm2  = h2 g2 f2 e2 \
+    mm3  = h3 g3 f3 e3*/ \
+ /*Second 4x4 transpose:*/ \
+ /*mm4 = a3 a2 a1 a0 \
+   mm0 = b3 b2 b1 b0 \
+   mm6 = c3 c2 c1 c0 \
+   mm5 = d3 d2 d1 d0*/ \
+ "movq %%mm4,%%mm7\n\t" \
+ "punpcklwd %%mm0,%%mm4\n\t" \
+ "punpckhwd %%mm0,%%mm7\n\t" \
+ "movq %%mm6,%%mm0\n\t" \
+ "punpcklwd %%mm5,%%mm6\n\t" \
+ "punpckhwd %%mm5,%%mm0\n\t" \
+ /*mm4 = b1 a1 b0 a0 \
+   mm7 = b3 a3 b2 a2 \
+   mm6 = d1 c1 d0 c0 \
+   mm0 = d3 c3 d2 c2*/ \
+ "movq %%mm4,%%mm5\n\t" \
+ "punpckldq %%mm6,%%mm4\n\t" \
+ "punpckhdq %%mm6,%%mm5\n\t" \
+ "movq %%mm7,%%mm6\n\t" \
+ "punpckhdq %%mm0,%%mm7\n\t" \
+ "punpckldq %%mm0,%%mm6\n\t" \
+ /*mm4 = d0 c0 b0 a0 \
+   mm5 = d1 c1 b1 a1 \
+   mm6 = d2 c2 b2 a2 \
+   mm7 = d3 c3 b3 a3*/ \
+
+/*MMX implementation of the fDCT.*/
+void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ptrdiff_t a;
+  __asm__ __volatile__(
+    /*Add two extra bits of working precision to improve accuracy; any more and
+       we could overflow.*/
+    /*We also add biases to correct for some systematic error that remains in
+       the full fDCT->iDCT round trip.*/
+    "movq 0x00(%[x]),%%mm0\n\t"
+    "movq 0x10(%[x]),%%mm1\n\t"
+    "movq 0x20(%[x]),%%mm2\n\t"
+    "movq 0x30(%[x]),%%mm3\n\t"
+    "pcmpeqb %%mm4,%%mm4\n\t"
+    "pxor %%mm7,%%mm7\n\t"
+    "movq %%mm0,%%mm5\n\t"
+    "psllw $2,%%mm0\n\t"
+    "pcmpeqw %%mm7,%%mm5\n\t"
+    "movq 0x70(%[x]),%%mm7\n\t"
+    "psllw $2,%%mm1\n\t"
+    "psubw %%mm4,%%mm5\n\t"
+    "psllw $2,%%mm2\n\t"
+    "mov $1,%[a]\n\t"
+    "pslld $16,%%mm5\n\t"
+    "movd %[a],%%mm6\n\t"
+    "psllq $16,%%mm5\n\t"
+    "mov $0x10001,%[a]\n\t"
+    "psllw $2,%%mm3\n\t"
+    "movd %[a],%%mm4\n\t"
+    "punpckhwd %%mm6,%%mm5\n\t"
+    "psubw %%mm6,%%mm1\n\t"
+    "movq 0x60(%[x]),%%mm6\n\t"
+    "paddw %%mm5,%%mm0\n\t"
+    "movq 0x50(%[x]),%%mm5\n\t"
+    "paddw %%mm4,%%mm0\n\t"
+    "movq 0x40(%[x]),%%mm4\n\t"
+    /*We inline stage1 of the transform here so we can get better instruction
+       scheduling with the shifts.*/
+    /*mm0=t7'=t0-t7*/
+    "psllw $2,%%mm7\n\t"
+    "psubw %%mm7,%%mm0\n\t"
+    "psllw $2,%%mm6\n\t"
+    "paddw %%mm7,%%mm7\n\t"
+    /*mm1=t6'=t1-t6*/
+    "psllw $2,%%mm5\n\t"
+    "psubw %%mm6,%%mm1\n\t"
+    "psllw $2,%%mm4\n\t"
+    "paddw %%mm6,%%mm6\n\t"
+    /*mm2=t5'=t2-t5*/
+    "psubw %%mm5,%%mm2\n\t"
+    "paddw %%mm5,%%mm5\n\t"
+    /*mm3=t4'=t3-t4*/
+    "psubw %%mm4,%%mm3\n\t"
+    "paddw %%mm4,%%mm4\n\t"
+    /*mm7=t0'=t0+t7*/
+    "paddw %%mm0,%%mm7\n\t"
+    /*mm6=t1'=t1+t6*/
+    "paddw %%mm1,%%mm6\n\t"
+    /*mm5=t2'=t2+t5*/
+    "paddw %%mm2,%%mm5\n\t"
+    /*mm4=t3'=t3+t4*/
+    "paddw %%mm3,%%mm4\n\t"
+    OC_FDCT8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70")
+    OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70")
+    /*Swap out this 8x4 block for the next one.*/
+    "movq 0x08(%[x]),%%mm0\n\t"
+    "movq %%mm7,0x30(%[y])\n\t"
+    "movq 0x78(%[x]),%%mm7\n\t"
+    "movq %%mm1,0x50(%[y])\n\t"
+    "movq 0x18(%[x]),%%mm1\n\t"
+    "movq %%mm6,0x20(%[y])\n\t"
+    "movq 0x68(%[x]),%%mm6\n\t"
+    "movq %%mm2,0x60(%[y])\n\t"
+    "movq 0x28(%[x]),%%mm2\n\t"
+    "movq %%mm5,0x10(%[y])\n\t"
+    "movq 0x58(%[x]),%%mm5\n\t"
+    "movq %%mm3,0x70(%[y])\n\t"
+    "movq 0x38(%[x]),%%mm3\n\t"
+    /*And increase its working precision, too.*/
+    "psllw $2,%%mm0\n\t"
+    "movq %%mm4,0x00(%[y])\n\t"
+    "psllw $2,%%mm7\n\t"
+    "movq 0x48(%[x]),%%mm4\n\t"
+    /*We inline stage1 of the transform here so we can get better instruction
+       scheduling with the shifts.*/
+    /*mm0=t7'=t0-t7*/
+    "psubw %%mm7,%%mm0\n\t"
+    "psllw $2,%%mm1\n\t"
+    "paddw %%mm7,%%mm7\n\t"
+    "psllw $2,%%mm6\n\t"
+    /*mm1=t6'=t1-t6*/
+    "psubw %%mm6,%%mm1\n\t"
+    "psllw $2,%%mm2\n\t"
+    "paddw %%mm6,%%mm6\n\t"
+    "psllw $2,%%mm5\n\t"
+    /*mm2=t5'=t2-t5*/
+    "psubw %%mm5,%%mm2\n\t"
+    "psllw $2,%%mm3\n\t"
+    "paddw %%mm5,%%mm5\n\t"
+    "psllw $2,%%mm4\n\t"
+    /*mm3=t4'=t3-t4*/
+    "psubw %%mm4,%%mm3\n\t"
+    "paddw %%mm4,%%mm4\n\t"
+    /*mm7=t0'=t0+t7*/
+    "paddw %%mm0,%%mm7\n\t"
+    /*mm6=t1'=t1+t6*/
+    "paddw %%mm1,%%mm6\n\t"
+    /*mm5=t2'=t2+t5*/
+    "paddw %%mm2,%%mm5\n\t"
+    /*mm4=t3'=t3+t4*/
+    "paddw %%mm3,%%mm4\n\t"
+    OC_FDCT8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78")
+    OC_TRANSPOSE8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78")
+    /*Here the first 4x4 block of output from the last transpose is the second
+       4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place,
+       so we only have to do half the stores and loads.*/
+    "movq 0x00(%[y]),%%mm0\n\t"
+    "movq %%mm1,0x58(%[y])\n\t"
+    "movq 0x10(%[y]),%%mm1\n\t"
+    "movq %%mm2,0x68(%[y])\n\t"
+    "movq 0x20(%[y]),%%mm2\n\t"
+    "movq %%mm3,0x78(%[y])\n\t"
+    "movq 0x30(%[y]),%%mm3\n\t"
+    OC_FDCT_STAGE1_8x4
+    OC_FDCT8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38")
+    OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38")
+    /*mm0={-2}x4*/
+    "pcmpeqw %%mm0,%%mm0\n\t"
+    "paddw %%mm0,%%mm0\n\t"
+    /*Round the results.*/
+    "psubw %%mm0,%%mm1\n\t"
+    "psubw %%mm0,%%mm2\n\t"
+    "psraw $2,%%mm1\n\t"
+    "psubw %%mm0,%%mm3\n\t"
+    "movq %%mm1,0x18(%[y])\n\t"
+    "psraw $2,%%mm2\n\t"
+    "psubw %%mm0,%%mm4\n\t"
+    "movq 0x08(%[y]),%%mm1\n\t"
+    "psraw $2,%%mm3\n\t"
+    "psubw %%mm0,%%mm5\n\t"
+    "psraw $2,%%mm4\n\t"
+    "psubw %%mm0,%%mm6\n\t"
+    "psraw $2,%%mm5\n\t"
+    "psubw %%mm0,%%mm7\n\t"
+    "psraw $2,%%mm6\n\t"
+    "psubw %%mm0,%%mm1\n\t"
+    "psraw $2,%%mm7\n\t"
+    "movq 0x40(%[y]),%%mm0\n\t"
+    "psraw $2,%%mm1\n\t"
+    "movq %%mm7,0x30(%[y])\n\t"
+    "movq 0x78(%[y]),%%mm7\n\t"
+    "movq %%mm1,0x08(%[y])\n\t"
+    "movq 0x50(%[y]),%%mm1\n\t"
+    "movq %%mm6,0x20(%[y])\n\t"
+    "movq 0x68(%[y]),%%mm6\n\t"
+    "movq %%mm2,0x28(%[y])\n\t"
+    "movq 0x60(%[y]),%%mm2\n\t"
+    "movq %%mm5,0x10(%[y])\n\t"
+    "movq 0x58(%[y]),%%mm5\n\t"
+    "movq %%mm3,0x38(%[y])\n\t"
+    "movq 0x70(%[y]),%%mm3\n\t"
+    "movq %%mm4,0x00(%[y])\n\t"
+    "movq 0x48(%[y]),%%mm4\n\t"
+    OC_FDCT_STAGE1_8x4
+    OC_FDCT8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78")
+    OC_TRANSPOSE8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78")
+    /*mm0={-2}x4*/
+    "pcmpeqw %%mm0,%%mm0\n\t"
+    "paddw %%mm0,%%mm0\n\t"
+    /*Round the results.*/
+    "psubw %%mm0,%%mm1\n\t"
+    "psubw %%mm0,%%mm2\n\t"
+    "psraw $2,%%mm1\n\t"
+    "psubw %%mm0,%%mm3\n\t"
+    "movq %%mm1,0x58(%[y])\n\t"
+    "psraw $2,%%mm2\n\t"
+    "psubw %%mm0,%%mm4\n\t"
+    "movq 0x48(%[y]),%%mm1\n\t"
+    "psraw $2,%%mm3\n\t"
+    "psubw %%mm0,%%mm5\n\t"
+    "movq %%mm2,0x68(%[y])\n\t"
+    "psraw $2,%%mm4\n\t"
+    "psubw %%mm0,%%mm6\n\t"
+    "movq %%mm3,0x78(%[y])\n\t"
+    "psraw $2,%%mm5\n\t"
+    "psubw %%mm0,%%mm7\n\t"
+    "movq %%mm4,0x40(%[y])\n\t"
+    "psraw $2,%%mm6\n\t"
+    "psubw %%mm0,%%mm1\n\t"
+    "movq %%mm5,0x50(%[y])\n\t"
+    "psraw $2,%%mm7\n\t"
+    "movq %%mm6,0x60(%[y])\n\t"
+    "psraw $2,%%mm1\n\t"
+    "movq %%mm7,0x70(%[y])\n\t"
+    "movq %%mm1,0x48(%[y])\n\t"
+    :[a]"=&r"(a)
+    :[y]"r"(_y),[x]"r"(_x)
+    :"memory"
+  );
+}
+
+#endif

Copied: trunk/theora/lib/x86/sse2fdct.c (from rev 16442, trunk/theora/lib/enc/x86/sse2fdct.c)
===================================================================
--- trunk/theora/lib/x86/sse2fdct.c	                        (rev 0)
+++ trunk/theora/lib/x86/sse2fdct.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,523 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************/
+/*SSE2 fDCT implementation for x86_64.*/
+/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/
+#include <stddef.h>
+#include "x86enc.h"
+
+#if defined(OC_X86_64_ASM)
+
+# define OC_FDCT8x8 \
+ /*Note: xmm15={0}x8 and xmm14={-1}x8.*/ \
+ "#OC_FDCT8x8\n\t" \
+ /*Stage 1:*/ \
+ "movdqa %%xmm0,%%xmm11\n\t" \
+ "movdqa %%xmm1,%%xmm10\n\t" \
+ "movdqa %%xmm2,%%xmm9\n\t" \
+ "movdqa %%xmm3,%%xmm8\n\t" \
+ /*xmm11=t7'=t0-t7*/ \
+ "psubw %%xmm7,%%xmm11\n\t" \
+ /*xmm10=t6'=t1-t6*/ \
+ "psubw %%xmm6,%%xmm10\n\t" \
+ /*xmm9=t5'=t2-t5*/ \
+ "psubw %%xmm5,%%xmm9\n\t" \
+ /*xmm8=t4'=t3-t4*/ \
+ "psubw %%xmm4,%%xmm8\n\t" \
+ /*xmm0=t0'=t0+t7*/ \
+ "paddw %%xmm7,%%xmm0\n\t" \
+ /*xmm1=t1'=t1+t6*/ \
+ "paddw %%xmm6,%%xmm1\n\t" \
+ /*xmm5=t2'=t2+t5*/ \
+ "paddw %%xmm2,%%xmm5\n\t" \
+ /*xmm4=t3'=t3+t4*/ \
+ "paddw %%xmm3,%%xmm4\n\t" \
+ /*xmm2,3,6,7 are now free.*/ \
+ /*Stage 2:*/ \
+ "movdqa %%xmm0,%%xmm3\n\t" \
+ "mov $0x5A806A0A,%[a]\n\t" \
+ "movdqa %%xmm1,%%xmm2\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "movdqa %%xmm10,%%xmm6\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ /*xmm2=t2''=t1'-t2'*/ \
+ "psubw %%xmm5,%%xmm2\n\t" \
+ "pxor %%xmm12,%%xmm12\n\t" \
+ /*xmm3=t3''=t0'-t3'*/ \
+ "psubw %%xmm4,%%xmm3\n\t" \
+ "psubw %%xmm14,%%xmm12\n\t" \
+ /*xmm10=t5''=t6'-t5'*/ \
+ "psubw %%xmm9,%%xmm10\n\t" \
+ "paddw %%xmm12,%%xmm12\n\t" \
+ /*xmm4=t0''=t0'+t3'*/ \
+ "paddw %%xmm0,%%xmm4\n\t" \
+ /*xmm1=t1''=t1'+t2'*/ \
+ "paddw %%xmm5,%%xmm1\n\t" \
+ /*xmm6=t6''=t6'+t5'*/ \
+ "paddw %%xmm9,%%xmm6\n\t" \
+ /*xmm0,xmm5,xmm9 are now free.*/ \
+ /*Stage 3:*/ \
+ /*xmm10:xmm5=t5''*27146+0xB500 \
+   xmm0=t5''*/ \
+ "movdqa %%xmm10,%%xmm5\n\t" \
+ "movdqa %%xmm10,%%xmm0\n\t" \
+ "punpckhwd %%xmm12,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ "punpcklwd %%xmm12,%%xmm5\n\t" \
+ "pmaddwd %%xmm13,%%xmm5\n\t" \
+ /*xmm5=(t5''*27146+0xB500>>16)+t5''*/ \
+ "psrad $16,%%xmm10\n\t" \
+ "psrad $16,%%xmm5\n\t" \
+ "packssdw %%xmm10,%%xmm5\n\t" \
+ "paddw %%xmm0,%%xmm5\n\t" \
+ /*xmm0=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \
+ "pcmpeqw %%xmm15,%%xmm0\n\t" \
+ "psubw %%xmm14,%%xmm0\n\t" \
+ "paddw %%xmm5,%%xmm0\n\t" \
+ "movdqa %%xmm8,%%xmm5\n\t" \
+ "psraw $1,%%xmm0\n\t" \
+ /*xmm5=t5'''=t4'-s*/ \
+ "psubw %%xmm0,%%xmm5\n\t" \
+ /*xmm8=t4''=t4'+s*/ \
+ "paddw %%xmm0,%%xmm8\n\t" \
+ /*xmm0,xmm7,xmm9,xmm10 are free.*/ \
+ /*xmm7:xmm9=t6''*27146+0xB500*/ \
+ "movdqa %%xmm6,%%xmm7\n\t" \
+ "movdqa %%xmm6,%%xmm9\n\t" \
+ "punpckhwd %%xmm12,%%xmm7\n\t" \
+ "pmaddwd %%xmm13,%%xmm7\n\t" \
+ "punpcklwd %%xmm12,%%xmm9\n\t" \
+ "pmaddwd %%xmm13,%%xmm9\n\t" \
+ /*xmm9=(t6''*27146+0xB500>>16)+t6''*/ \
+ "psrad $16,%%xmm7\n\t" \
+ "psrad $16,%%xmm9\n\t" \
+ "packssdw %%xmm7,%%xmm9\n\t" \
+ "paddw %%xmm6,%%xmm9\n\t" \
+ /*xmm9=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \
+ "pcmpeqw %%xmm15,%%xmm6\n\t" \
+ "psubw %%xmm14,%%xmm6\n\t" \
+ "paddw %%xmm6,%%xmm9\n\t" \
+ "movdqa %%xmm11,%%xmm7\n\t" \
+ "psraw $1,%%xmm9\n\t" \
+ /*xmm7=t6'''=t7'-s*/ \
+ "psubw %%xmm9,%%xmm7\n\t" \
+ /*xmm9=t7''=t7'+s*/ \
+ "paddw %%xmm11,%%xmm9\n\t" \
+ /*xmm0,xmm6,xmm10,xmm11 are free.*/ \
+ /*Stage 4:*/ \
+ /*xmm10:xmm0=t1''*27146+0xB500*/ \
+ "movdqa %%xmm1,%%xmm0\n\t" \
+ "movdqa %%xmm1,%%xmm10\n\t" \
+ "punpcklwd %%xmm12,%%xmm0\n\t" \
+ "pmaddwd %%xmm13,%%xmm0\n\t" \
+ "punpckhwd %%xmm12,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ /*xmm0=(t1''*27146+0xB500>>16)+t1''*/ \
+ "psrad $16,%%xmm0\n\t" \
+ "psrad $16,%%xmm10\n\t" \
+ "mov $0x20006A0A,%[a]\n\t" \
+ "packssdw %%xmm10,%%xmm0\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddw %%xmm1,%%xmm0\n\t" \
+ /*xmm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \
+ "pcmpeqw %%xmm15,%%xmm1\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "psubw %%xmm14,%%xmm1\n\t" \
+ "paddw %%xmm1,%%xmm0\n\t" \
+ /*xmm10:xmm4=t0''*27146+0x4000*/ \
+ "movdqa %%xmm4,%%xmm1\n\t" \
+ "movdqa %%xmm4,%%xmm10\n\t" \
+ "punpcklwd %%xmm12,%%xmm4\n\t" \
+ "pmaddwd %%xmm13,%%xmm4\n\t" \
+ "punpckhwd %%xmm12,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ /*xmm4=(t0''*27146+0x4000>>16)+t0''*/ \
+ "psrad $16,%%xmm4\n\t" \
+ "psrad $16,%%xmm10\n\t" \
+ "mov $0x6CB7,%[a]\n\t" \
+ "packssdw %%xmm10,%%xmm4\n\t" \
+ "movd %[a],%%xmm12\n\t" \
+ "paddw %%xmm1,%%xmm4\n\t" \
+ /*xmm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \
+ "pcmpeqw %%xmm15,%%xmm1\n\t" \
+ "pshufd $00,%%xmm12,%%xmm12\n\t" \
+ "psubw %%xmm14,%%xmm1\n\t" \
+ "mov $0x7FFF6C84,%[a]\n\t" \
+ "paddw %%xmm1,%%xmm4\n\t" \
+ /*xmm0=_y[0]=u=r+s>>1 \
+   The naive implementation could cause overflow, so we use \
+    u=(r&s)+((r^s)>>1).*/ \
+ "movdqa %%xmm0,%%xmm6\n\t" \
+ "pxor %%xmm4,%%xmm0\n\t" \
+ "pand %%xmm4,%%xmm6\n\t" \
+ "psraw $1,%%xmm0\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddw %%xmm6,%%xmm0\n\t" \
+ /*xmm4=_y[4]=v=r-u*/ \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "psubw %%xmm0,%%xmm4\n\t" \
+ /*xmm1,xmm6,xmm10,xmm11 are free.*/ \
+ /*xmm6:xmm10=60547*t3''+0x6CB7*/ \
+ "movdqa %%xmm3,%%xmm10\n\t" \
+ "movdqa %%xmm3,%%xmm6\n\t" \
+ "punpcklwd %%xmm3,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ "mov $0x61F861F8,%[a]\n\t" \
+ "punpckhwd %%xmm3,%%xmm6\n\t" \
+ "pmaddwd %%xmm13,%%xmm6\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm6\n\t" \
+ /*xmm1:xmm2=25080*t2'' \
+   xmm12=t2''*/ \
+ "movdqa %%xmm2,%%xmm11\n\t" \
+ "movdqa %%xmm2,%%xmm12\n\t" \
+ "pmullw %%xmm13,%%xmm2\n\t" \
+ "pmulhw %%xmm13,%%xmm11\n\t" \
+ "movdqa %%xmm2,%%xmm1\n\t" \
+ "punpcklwd %%xmm11,%%xmm2\n\t" \
+ "punpckhwd %%xmm11,%%xmm1\n\t" \
+ /*xmm10=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \
+ "paddd %%xmm2,%%xmm10\n\t" \
+ "paddd %%xmm1,%%xmm6\n\t" \
+ "psrad $16,%%xmm10\n\t" \
+ "pcmpeqw %%xmm15,%%xmm3\n\t" \
+ "psrad $16,%%xmm6\n\t" \
+ "psubw %%xmm14,%%xmm3\n\t" \
+ "packssdw %%xmm6,%%xmm10\n\t" \
+ "paddw %%xmm3,%%xmm10\n\t" \
+ /*xmm2=_y[2]=u \
+   xmm10=s=(25080*u>>16)-t2''*/ \
+ "movdqa %%xmm10,%%xmm2\n\t" \
+ "pmulhw %%xmm13,%%xmm10\n\t" \
+ "psubw %%xmm12,%%xmm10\n\t" \
+ /*xmm1:xmm6=s*21600+0x2800*/ \
+ "pxor %%xmm12,%%xmm12\n\t" \
+ "psubw %%xmm14,%%xmm12\n\t" \
+ "mov $0x28005460,%[a]\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "movdqa %%xmm10,%%xmm6\n\t" \
+ "movdqa %%xmm10,%%xmm1\n\t" \
+ "punpcklwd %%xmm12,%%xmm6\n\t" \
+ "pmaddwd %%xmm13,%%xmm6\n\t" \
+ "mov $0x0E3D,%[a]\n\t" \
+ "punpckhwd %%xmm12,%%xmm1\n\t" \
+ "pmaddwd %%xmm13,%%xmm1\n\t" \
+ /*xmm6=(s*21600+0x2800>>18)+s*/ \
+ "psrad $18,%%xmm6\n\t" \
+ "psrad $18,%%xmm1\n\t" \
+ "movd %[a],%%xmm12\n\t" \
+ "packssdw %%xmm1,%%xmm6\n\t" \
+ "pshufd $00,%%xmm12,%%xmm12\n\t" \
+ "paddw %%xmm10,%%xmm6\n\t" \
+ /*xmm6=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \
+ "mov $0x7FFF54DC,%[a]\n\t" \
+ "pcmpeqw %%xmm15,%%xmm10\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "psubw %%xmm14,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "paddw %%xmm10,%%xmm6\n\t " \
+ /*xmm1,xmm3,xmm10,xmm11 are free.*/ \
+ /*xmm11:xmm10=54491*t5'''+0x0E3D*/ \
+ "movdqa %%xmm5,%%xmm10\n\t" \
+ "movdqa %%xmm5,%%xmm11\n\t" \
+ "punpcklwd %%xmm5,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ "mov $0x8E3A8E3A,%[a]\n\t" \
+ "punpckhwd %%xmm5,%%xmm11\n\t" \
+ "pmaddwd %%xmm13,%%xmm11\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm11\n\t" \
+ /*xmm7:xmm12=36410*t6''' \
+   xmm1=t6'''*/ \
+ "movdqa %%xmm7,%%xmm3\n\t" \
+ "movdqa %%xmm7,%%xmm1\n\t" \
+ "pmulhw %%xmm13,%%xmm3\n\t" \
+ "pmullw %%xmm13,%%xmm7\n\t" \
+ "paddw %%xmm1,%%xmm3\n\t" \
+ "movdqa %%xmm7,%%xmm12\n\t" \
+ "punpckhwd %%xmm3,%%xmm7\n\t" \
+ "punpcklwd %%xmm3,%%xmm12\n\t" \
+ /*xmm10=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \
+ "paddd %%xmm12,%%xmm10\n\t" \
+ "paddd %%xmm7,%%xmm11\n\t" \
+ "psrad $16,%%xmm10\n\t" \
+ "pcmpeqw %%xmm15,%%xmm5\n\t" \
+ "psrad $16,%%xmm11\n\t" \
+ "psubw %%xmm14,%%xmm5\n\t" \
+ "packssdw %%xmm11,%%xmm10\n\t" \
+ "pxor %%xmm12,%%xmm12\n\t" \
+ "paddw %%xmm5,%%xmm10\n\t" \
+ /*xmm5=_y[5]=u \
+   xmm1=s=t6'''-(36410*u>>16)*/ \
+ "psubw %%xmm14,%%xmm12\n\t" \
+ "movdqa %%xmm10,%%xmm5\n\t" \
+ "mov $0x340067C8,%[a]\n\t" \
+ "pmulhw %%xmm13,%%xmm10\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddw %%xmm5,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "psubw %%xmm10,%%xmm1\n\t" \
+ /*xmm11:xmm3=s*26568+0x3400*/ \
+ "movdqa %%xmm1,%%xmm3\n\t" \
+ "movdqa %%xmm1,%%xmm11\n\t" \
+ "punpcklwd %%xmm12,%%xmm3\n\t" \
+ "pmaddwd %%xmm13,%%xmm3\n\t" \
+ "mov $0x7B1B,%[a]\n\t" \
+ "punpckhwd %%xmm12,%%xmm11\n\t" \
+ "pmaddwd %%xmm13,%%xmm11\n\t" \
+ /*xmm3=(s*26568+0x3400>>17)+s*/ \
+ "psrad $17,%%xmm3\n\t" \
+ "psrad $17,%%xmm11\n\t" \
+ "movd %[a],%%xmm12\n\t" \
+ "packssdw %%xmm11,%%xmm3\n\t" \
+ "pshufd $00,%%xmm12,%%xmm12\n\t" \
+ "paddw %%xmm1,%%xmm3\n\t" \
+ /*xmm3=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \
+ "mov $0x7FFF7B16,%[a]\n\t" \
+ "pcmpeqw %%xmm15,%%xmm1\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "psubw %%xmm14,%%xmm1\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "paddw %%xmm1,%%xmm3\n\t " \
+ /*xmm1,xmm7,xmm10,xmm11 are free.*/ \
+ /*xmm11:xmm10=64277*t7''+0x7B1B*/ \
+ "movdqa %%xmm9,%%xmm10\n\t" \
+ "movdqa %%xmm9,%%xmm11\n\t" \
+ "punpcklwd %%xmm9,%%xmm10\n\t" \
+ "pmaddwd %%xmm13,%%xmm10\n\t" \
+ "mov $0x31F131F1,%[a]\n\t" \
+ "punpckhwd %%xmm9,%%xmm11\n\t" \
+ "pmaddwd %%xmm13,%%xmm11\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ "paddd %%xmm12,%%xmm11\n\t" \
+ /*xmm12:xmm7=12785*t4''*/ \
+ "movdqa %%xmm8,%%xmm7\n\t" \
+ "movdqa %%xmm8,%%xmm1\n\t" \
+ "pmullw %%xmm13,%%xmm7\n\t" \
+ "pmulhw %%xmm13,%%xmm1\n\t" \
+ "movdqa %%xmm7,%%xmm12\n\t" \
+ "punpcklwd %%xmm1,%%xmm7\n\t" \
+ "punpckhwd %%xmm1,%%xmm12\n\t" \
+ /*xmm10=u=(12785*t4''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \
+ "paddd %%xmm7,%%xmm10\n\t" \
+ "paddd %%xmm12,%%xmm11\n\t" \
+ "psrad $16,%%xmm10\n\t" \
+ "pcmpeqw %%xmm15,%%xmm9\n\t" \
+ "psrad $16,%%xmm11\n\t" \
+ "psubw %%xmm14,%%xmm9\n\t" \
+ "packssdw %%xmm11,%%xmm10\n\t" \
+ "pxor %%xmm12,%%xmm12\n\t" \
+ "paddw %%xmm9,%%xmm10\n\t" \
+ /*xmm1=_y[1]=u \
+   xmm10=s=(12785*u>>16)-t4''*/ \
+ "psubw %%xmm14,%%xmm12\n\t" \
+ "movdqa %%xmm10,%%xmm1\n\t" \
+ "mov $0x3000503B,%[a]\n\t" \
+ "pmulhw %%xmm13,%%xmm10\n\t" \
+ "movd %[a],%%xmm13\n\t" \
+ "psubw %%xmm8,%%xmm10\n\t" \
+ "pshufd $00,%%xmm13,%%xmm13\n\t" \
+ /*xmm8:xmm7=s*20539+0x3000*/ \
+ "movdqa %%xmm10,%%xmm7\n\t" \
+ "movdqa %%xmm10,%%xmm8\n\t" \
+ "punpcklwd %%xmm12,%%xmm7\n\t" \
+ "pmaddwd %%xmm13,%%xmm7\n\t" \
+ "punpckhwd %%xmm12,%%xmm8\n\t" \
+ "pmaddwd %%xmm13,%%xmm8\n\t" \
+ /*xmm7=(s*20539+0x3000>>20)+s*/ \
+ "psrad $20,%%xmm7\n\t" \
+ "psrad $20,%%xmm8\n\t" \
+ "packssdw %%xmm8,%%xmm7\n\t" \
+ "paddw %%xmm10,%%xmm7\n\t" \
+ /*xmm7=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \
+ "pcmpeqw %%xmm15,%%xmm10\n\t" \
+ "psubw %%xmm14,%%xmm10\n\t" \
+ "paddw %%xmm10,%%xmm7\n\t " \
+
+# define OC_TRANSPOSE8x8 \
+ "#OC_TRANSPOSE8x8\n\t" \
+ "movdqa %%xmm4,%%xmm8\n\t" \
+ /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \
+ "punpcklwd %%xmm5,%%xmm4\n\t" \
+ /*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \
+ "punpckhwd %%xmm5,%%xmm8\n\t" \
+ /*xmm5 is free.*/ \
+ "movdqa %%xmm0,%%xmm5\n\t" \
+ /*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \
+ "punpcklwd %%xmm1,%%xmm0\n\t" \
+ /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \
+ "punpckhwd %%xmm1,%%xmm5\n\t" \
+ /*xmm1 is free.*/ \
+ "movdqa %%xmm6,%%xmm1\n\t" \
+ /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \
+ "punpcklwd %%xmm7,%%xmm6\n\t" \
+ /*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \
+ "punpckhwd %%xmm7,%%xmm1\n\t" \
+ /*xmm7 is free.*/ \
+ "movdqa %%xmm2,%%xmm7\n\t" \
+ /*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \
+ "punpcklwd %%xmm3,%%xmm7\n\t" \
+ /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \
+ "punpckhwd %%xmm3,%%xmm2\n\t" \
+ /*xmm3 is free.*/ \
+ "movdqa %%xmm0,%%xmm3\n\t" \
+ /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \
+ "punpckldq %%xmm7,%%xmm0\n\t" \
+ /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \
+ "punpckhdq %%xmm7,%%xmm3\n\t" \
+ /*xmm7 is free.*/ \
+ "movdqa %%xmm5,%%xmm7\n\t" \
+ /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \
+ "punpckldq %%xmm2,%%xmm5\n\t" \
+ /*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \
+ "punpckhdq %%xmm2,%%xmm7\n\t" \
+ /*xmm2 is free.*/ \
+ "movdqa %%xmm4,%%xmm2\n\t" \
+ /*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \
+ "punpckldq %%xmm6,%%xmm2\n\t" \
+ /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \
+ "punpckhdq %%xmm6,%%xmm4\n\t" \
+ /*xmm6 is free.*/ \
+ "movdqa %%xmm8,%%xmm6\n\t" \
+ /*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \
+ "punpckldq %%xmm1,%%xmm6\n\t" \
+ /*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \
+ "punpckhdq %%xmm1,%%xmm8\n\t" \
+ /*xmm1 is free.*/ \
+ "movdqa %%xmm0,%%xmm1\n\t" \
+ /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
+ "punpcklqdq %%xmm2,%%xmm0\n\t" \
+ /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \
+ "punpckhqdq %%xmm2,%%xmm1\n\t" \
+ /*xmm2 is free.*/ \
+ "movdqa %%xmm3,%%xmm2\n\t" \
+ /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \
+ "punpcklqdq %%xmm4,%%xmm2\n\t" \
+ /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \
+ "punpckhqdq %%xmm4,%%xmm3\n\t" \
+ /*xmm4 is free.*/ \
+ "movdqa %%xmm5,%%xmm4\n\t" \
+ /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \
+ "punpcklqdq %%xmm6,%%xmm4\n\t" \
+ /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \
+ "punpckhqdq %%xmm6,%%xmm5\n\t" \
+ /*xmm6 is free.*/ \
+ "movdqa %%xmm7,%%xmm6\n\t" \
+ /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \
+ "punpcklqdq %%xmm8,%%xmm6\n\t" \
+ /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \
+ "punpckhqdq %%xmm8,%%xmm7\n\t" \
+ /*xmm8 is free.*/ \
+
+/*SSE2 implementation of the fDCT for x86-64 only.
+  Because of the 8 extra XMM registers on x86-64, this version can operate
+   without any temporary stack access at all.*/
+void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ptrdiff_t a;
+  __asm__ __volatile__(
+    /*Load the input.*/
+    "movdqa 0x00(%[x]),%%xmm0\n\t"
+    "movdqa 0x10(%[x]),%%xmm1\n\t"
+    "movdqa 0x20(%[x]),%%xmm2\n\t"
+    "movdqa 0x30(%[x]),%%xmm3\n\t"
+    "movdqa 0x40(%[x]),%%xmm4\n\t"
+    "movdqa 0x50(%[x]),%%xmm5\n\t"
+    "movdqa 0x60(%[x]),%%xmm6\n\t"
+    "movdqa 0x70(%[x]),%%xmm7\n\t"
+    /*Add two extra bits of working precision to improve accuracy; any more and
+       we could overflow.*/
+    /*We also add a few biases to correct for some systematic error that
+       remains in the full fDCT->iDCT round trip.*/
+    /*xmm15={0}x8*/
+    "pxor %%xmm15,%%xmm15\n\t"
+    /*xmm14={-1}x8*/
+    "pcmpeqb %%xmm14,%%xmm14\n\t"
+    "psllw $2,%%xmm0\n\t"
+    /*xmm8=xmm0*/
+    "movdqa %%xmm0,%%xmm8\n\t"
+    "psllw $2,%%xmm1\n\t"
+    /*xmm8={_x[7...0]==0}*/
+    "pcmpeqw %%xmm15,%%xmm8\n\t"
+    "psllw $2,%%xmm2\n\t"
+    /*xmm8={_x[7...0]!=0}*/
+    "psubw %%xmm14,%%xmm8\n\t"
+    "psllw $2,%%xmm3\n\t"
+    /*%[a]=1*/
+    "mov $1,%[a]\n\t"
+    /*xmm8={_x[6]!=0,0,_x[4]!=0,0,_x[2]!=0,0,_x[0]!=0,0}*/
+    "pslld $16,%%xmm8\n\t"
+    "psllw $2,%%xmm4\n\t"
+    /*xmm9={0,0,0,0,0,0,0,1}*/
+    "movd %[a],%%xmm9\n\t"
+    /*xmm8={0,0,_x[2]!=0,0,_x[0]!=0,0}*/
+    "pshufhw $0x00,%%xmm8,%%xmm8\n\t"
+    "psllw $2,%%xmm5\n\t"
+    /*%[a]={1}x2*/
+    "mov $0x10001,%[a]\n\t"
+    /*xmm8={0,0,0,0,0,0,0,_x[0]!=0}*/
+    "pshuflw $0x01,%%xmm8,%%xmm8\n\t"
+    "psllw $2,%%xmm6\n\t"
+    /*xmm10={0,0,0,0,0,0,1,1}*/
+    "movd %[a],%%xmm10\n\t"
+    /*xmm0=_x[7...0]+{0,0,0,0,0,0,0,_x[0]!=0}*/
+    "paddw %%xmm8,%%xmm0\n\t"
+    "psllw $2,%%xmm7\n\t"
+    /*xmm0=_x[7...0]+{0,0,0,0,0,0,1,(_x[0]!=0)+1}*/
+    "paddw %%xmm10,%%xmm0\n\t"
+    /*xmm1=_x[15...8]-{0,0,0,0,0,0,0,1}*/
+    "psubw %%xmm9,%%xmm1\n\t"
+    /*Transform columns.*/
+    OC_FDCT8x8
+    /*Transform rows.*/
+    OC_TRANSPOSE8x8
+    OC_FDCT8x8
+    /*TODO: zig-zag ordering?*/
+    OC_TRANSPOSE8x8
+    /*xmm14={-2,-2,-2,-2,-2,-2,-2,-2}*/
+    "paddw %%xmm14,%%xmm14\n\t"
+    "psubw %%xmm14,%%xmm0\n\t"
+    "psubw %%xmm14,%%xmm1\n\t"
+    "psraw $2,%%xmm0\n\t"
+    "psubw %%xmm14,%%xmm2\n\t"
+    "psraw $2,%%xmm1\n\t"
+    "psubw %%xmm14,%%xmm3\n\t"
+    "psraw $2,%%xmm2\n\t"
+    "psubw %%xmm14,%%xmm4\n\t"
+    "psraw $2,%%xmm3\n\t"
+    "psubw %%xmm14,%%xmm5\n\t"
+    "psraw $2,%%xmm4\n\t"
+    "psubw %%xmm14,%%xmm6\n\t"
+    "psraw $2,%%xmm5\n\t"
+    "psubw %%xmm14,%%xmm7\n\t"
+    "psraw $2,%%xmm6\n\t"
+    "psraw $2,%%xmm7\n\t"
+    /*Store the result.*/
+    "movdqa %%xmm0,0x00(%[y])\n\t"
+    "movdqa %%xmm1,0x10(%[y])\n\t"
+    "movdqa %%xmm2,0x20(%[y])\n\t"
+    "movdqa %%xmm3,0x30(%[y])\n\t"
+    "movdqa %%xmm4,0x40(%[y])\n\t"
+    "movdqa %%xmm5,0x50(%[y])\n\t"
+    "movdqa %%xmm6,0x60(%[y])\n\t"
+    "movdqa %%xmm7,0x70(%[y])\n\t"
+    :[a]"=&r"(a)
+    :[y]"r"(_y),[x]"r"(_x)
+    :"memory"
+  );
+}
+#endif

Copied: trunk/theora/lib/x86/x86enc.c (from rev 16442, trunk/theora/lib/enc/x86/x86enc.c)
===================================================================
--- trunk/theora/lib/x86/x86enc.c	                        (rev 0)
+++ trunk/theora/lib/x86/x86enc.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,49 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $
+
+ ********************************************************************/
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+#include "../cpu.c"
+
+void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){
+  ogg_uint32_t cpu_flags;
+  cpu_flags=oc_cpu_flags_get();
+  oc_enc_vtable_init_c(_enc);
+  if(cpu_flags&OC_CPU_X86_MMX){
+    _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx;
+    _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx;
+    _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx;
+  }
+  if(cpu_flags&OC_CPU_X86_MMXEXT){
+    _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
+    _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
+    _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
+    _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
+    _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+    _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
+    _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
+  }
+  if(cpu_flags&OC_CPU_X86_SSE2){
+# if defined(OC_X86_64_ASM)
+    /*_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;*/
+# endif
+  }
+}
+#endif

Copied: trunk/theora/lib/x86/x86enc.h (from rev 16442, trunk/theora/lib/enc/x86/x86enc.h)
===================================================================
--- trunk/theora/lib/x86/x86enc.h	                        (rev 0)
+++ trunk/theora/lib/x86/x86enc.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,47 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_x86_x86enc_H)
+# define _x86_x86enc_H (1)
+# include "../encint.h"
+# include "x86int.h"
+
+void oc_enc_vtable_init_x86(oc_enc_ctx *_enc);
+
+unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
+ const unsigned char *_x,const unsigned char *_y,int _stride);
+void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
+ const unsigned char *_x,int _stride);
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif

Modified: trunk/theora/lib/x86/x86int.h
===================================================================
--- trunk/theora/lib/dec/x86/x86int.h	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/x86/x86int.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -17,7 +17,7 @@
 
 #if !defined(_x86_x86int_H)
 # define _x86_x86int_H (1)
-# include "../../internal.h"
+# include "../internal.h"
 
 void oc_state_vtable_init_x86(oc_theora_state *_state);
 

Modified: trunk/theora/lib/x86/x86state.c
===================================================================
--- trunk/theora/lib/dec/x86/x86state.c	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/x86/x86state.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -19,7 +19,7 @@
 
 #if defined(OC_X86_ASM)
 
-#include "../../cpu.c"
+#include "../cpu.c"
 
 /*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
    each quadrant of the destination.*/

Copied: trunk/theora/lib/x86_vc/mmxencfrag.c (from rev 16442, trunk/theora/lib/enc/x86_vc/mmxencfrag.c)
===================================================================
--- trunk/theora/lib/x86_vc/mmxencfrag.c	                        (rev 0)
+++ trunk/theora/lib/x86_vc/mmxencfrag.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,969 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $
+
+ ********************************************************************/
+#include <stddef.h>
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  ptrdiff_t ret;
+  __asm{
+#define SRC esi
+#define REF edx
+#define YSTRIDE ecx
+#define YSTRIDE3 ebx
+    mov YSTRIDE,_ystride
+    mov SRC,_src
+    mov REF,_ref
+    /*Load the first 4 rows of each block.*/
+    movq mm0,[SRC]
+    movq mm1,[REF]
+    movq mm2,[SRC][YSTRIDE]
+    movq mm3,[REF][YSTRIDE]
+    lea YSTRIDE3,[YSTRIDE+YSTRIDE*2]
+    movq mm4,[SRC+YSTRIDE*2]
+    movq mm5,[REF+YSTRIDE*2]
+    movq mm6,[SRC+YSTRIDE3]
+    movq mm7,[REF+YSTRIDE3]
+    /*Compute their SADs and add them in mm0*/
+    psadbw mm0,mm1
+    psadbw mm2,mm3
+    lea SRC,[SRC+YSTRIDE*4]
+    paddw mm0,mm2
+    lea REF,[REF+YSTRIDE*4]
+    /*Load the next 3 rows as registers become available.*/
+    movq mm2,[SRC]
+    movq mm3,[REF]
+    psadbw mm4,mm5
+    psadbw mm6,mm7
+    paddw mm0,mm4
+    movq mm5,[REF+YSTRIDE]
+    movq mm4,[SRC+YSTRIDE]
+    paddw mm0,mm6
+    movq mm7,[REF+YSTRIDE*2]
+    movq mm6,[SRC+YSTRIDE*2]
+    /*Start adding their SADs to mm0*/
+    psadbw mm2,mm3
+    psadbw mm4,mm5
+    paddw mm0,mm2
+    psadbw mm6,mm7
+    /*Load last row as registers become available.*/
+    movq mm2,[SRC+YSTRIDE3]
+    movq mm3,[REF+YSTRIDE3]
+    /*And finish adding up their SADs.*/
+    paddw mm0,mm4
+    psadbw mm2,mm3
+    paddw mm0,mm6
+    paddw mm0,mm2
+    movd [ret],mm0
+#undef SRC
+#undef REF
+#undef YSTRIDE
+#undef YSTRIDE3
+  }
+  return (unsigned)ret;
+}
+
+unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  /*Early termination is for suckers.*/
+  return oc_enc_frag_sad_mmxext(_src,_ref,_ystride);
+}
+
+#define OC_SAD2_LOOP __asm{ \
+  /*We want to compute (mm0+mm1>>1) on unsigned bytes without overflow, but \
+     pavgb computes (mm0+mm1+1>>1). \
+   The latter is exactly 1 too large when the low bit of two corresponding \
+    bytes is only set in one of them. \
+   Therefore we pxor the operands, pand to mask out the low bits, and psubb to \
+    correct the output of pavgb.*/ \
+  __asm  movq mm6,mm0 \
+  __asm  lea REF1,[REF1+YSTRIDE*2] \
+  __asm  pxor mm0,mm1 \
+  __asm  pavgb mm6,mm1 \
+  __asm  lea REF2,[REF2+YSTRIDE*2] \
+  __asm  movq mm1,mm2 \
+  __asm  pand mm0,mm7 \
+  __asm  pavgb mm2,mm3 \
+  __asm  pxor mm1,mm3 \
+  __asm  movq mm3,[REF2+YSTRIDE] \
+  __asm  psubb mm6,mm0 \
+  __asm  movq mm0,[REF1] \
+  __asm  pand mm1,mm7 \
+  __asm  psadbw mm4,mm6 \
+  __asm  movd mm6,RET \
+  __asm  psubb mm2,mm1 \
+  __asm  movq mm1,[REF2] \
+  __asm  lea SRC,[SRC+YSTRIDE*2] \
+  __asm  psadbw mm5,mm2 \
+  __asm  movq mm2,[REF1+YSTRIDE] \
+  __asm  paddw mm5,mm4 \
+  __asm  movq mm4,[SRC] \
+  __asm  paddw mm6,mm5 \
+  __asm  movq mm5,[SRC+YSTRIDE] \
+  __asm  movd RET,mm6 \
+}
+
+/*Same as above, but does not pre-load the next two rows.*/
+#define OC_SAD2_TAIL __asm{ \
+  __asm  movq mm6,mm0 \
+  __asm  pavgb mm0,mm1 \
+  __asm  pxor mm6,mm1 \
+  __asm  movq mm1,mm2 \
+  __asm  pand mm6,mm7 \
+  __asm  pavgb mm2,mm3 \
+  __asm  pxor mm1,mm3 \
+  __asm  psubb mm0,mm6 \
+  __asm  pand mm1,mm7 \
+  __asm  psadbw mm4,mm0 \
+  __asm  psubb mm2,mm1 \
+  __asm  movd mm6,RET \
+  __asm  psadbw mm5,mm2 \
+  __asm  paddw mm5,mm4 \
+  __asm  paddw mm6,mm5 \
+  __asm  movd RET,mm6 \
+}
+
+unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  ptrdiff_t ret;
+  __asm{
+#define REF1 ecx
+#define REF2 edi
+#define YSTRIDE esi
+#define SRC edx
+#define RET eax
+    mov YSTRIDE,_ystride
+    mov SRC,_src
+    mov REF1,_ref1
+    mov REF2,_ref2
+    movq mm0,[REF1]
+    movq mm1,[REF2]
+    movq mm2,[REF1+YSTRIDE]
+    movq mm3,[REF2+YSTRIDE]
+    xor RET,RET
+    movq mm4,[SRC]
+    pxor mm7,mm7
+    pcmpeqb mm6,mm6
+    movq mm5,[SRC+YSTRIDE]
+    psubb mm7,mm6
+    OC_SAD2_LOOP
+    OC_SAD2_LOOP
+    OC_SAD2_LOOP
+    OC_SAD2_TAIL
+    mov [ret],RET
+#undef REF1
+#undef REF2
+#undef YSTRIDE
+#undef SRC
+#undef RET
+  }
+  return (unsigned)ret;
+}
+
+/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their
+  16-bit difference in mm0...mm7.*/
+#define OC_LOAD_SUB_8x4(_off) __asm{ \
+  __asm  movd mm0,[_off+SRC] \
+  __asm  movd mm4,[_off+REF] \
+  __asm  movd mm1,[_off+SRC+SRC_YSTRIDE] \
+  __asm  lea SRC,[SRC+SRC_YSTRIDE*2] \
+  __asm  movd mm5,[_off+REF+REF_YSTRIDE] \
+  __asm  lea REF,[REF+REF_YSTRIDE*2] \
+  __asm  movd mm2,[_off+SRC] \
+  __asm  movd mm7,[_off+REF] \
+  __asm  movd mm3,[_off+SRC+SRC_YSTRIDE] \
+  __asm  movd mm6,[_off+REF+REF_YSTRIDE] \
+  __asm  punpcklbw mm0,mm4 \
+  __asm  lea SRC,[SRC+SRC_YSTRIDE*2] \
+  __asm  punpcklbw mm4,mm4 \
+  __asm  lea REF,[REF+REF_YSTRIDE*2] \
+  __asm  psubw mm0,mm4 \
+  __asm  movd mm4,[_off+SRC] \
+  __asm  movq [_off*2+BUF],mm0 \
+  __asm  movd mm0,[_off+REF] \
+  __asm  punpcklbw mm1,mm5 \
+  __asm  punpcklbw mm5,mm5 \
+  __asm  psubw mm1,mm5 \
+  __asm  movd mm5,[_off+SRC+SRC_YSTRIDE] \
+  __asm  punpcklbw mm2,mm7 \
+  __asm  punpcklbw mm7,mm7 \
+  __asm  psubw mm2,mm7 \
+  __asm  movd mm7,[_off+REF+REF_YSTRIDE] \
+  __asm  punpcklbw mm3,mm6 \
+  __asm  lea SRC,[SRC+SRC_YSTRIDE*2] \
+  __asm  punpcklbw mm6,mm6 \
+  __asm  psubw mm3,mm6 \
+  __asm  movd mm6,[_off+SRC] \
+  __asm  punpcklbw mm4,mm0 \
+  __asm  lea REF,[REF+REF_YSTRIDE*2] \
+  __asm  punpcklbw mm0,mm0 \
+  __asm  lea SRC,[SRC+SRC_YSTRIDE*2] \
+  __asm  psubw mm4,mm0 \
+  __asm  movd mm0,[_off+REF] \
+  __asm  punpcklbw mm5,mm7 \
+  __asm  neg SRC_YSTRIDE \
+  __asm  punpcklbw mm7,mm7 \
+  __asm  psubw mm5,mm7 \
+  __asm  movd mm7,[_off+SRC+SRC_YSTRIDE] \
+  __asm  punpcklbw mm6,mm0 \
+  __asm  lea REF,[REF+REF_YSTRIDE*2] \
+  __asm  punpcklbw mm0,mm0 \
+  __asm  neg REF_YSTRIDE \
+  __asm  psubw mm6,mm0 \
+  __asm  movd mm0,[_off+REF+REF_YSTRIDE] \
+  __asm  lea SRC,[SRC+SRC_YSTRIDE*8] \
+  __asm  punpcklbw mm7,mm0 \
+  __asm  neg SRC_YSTRIDE \
+  __asm  punpcklbw mm0,mm0 \
+  __asm  lea REF,[REF+REF_YSTRIDE*8] \
+  __asm  psubw mm7,mm0 \
+  __asm  neg REF_YSTRIDE \
+  __asm  movq mm0,[_off*2+BUF] \
+}
+
+/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/
+#define OC_LOAD_8x4(_off) __asm{ \
+  __asm  movd mm0,[_off+SRC] \
+  __asm  movd mm1,[_off+SRC+YSTRIDE] \
+  __asm  movd mm2,[_off+SRC+YSTRIDE*2] \
+  __asm  pxor mm7,mm7 \
+  __asm  movd mm3,[_off+SRC+YSTRIDE3] \
+  __asm  punpcklbw mm0,mm7 \
+  __asm  movd mm4,[_off+SRC4] \
+  __asm  punpcklbw mm1,mm7 \
+  __asm  movd mm5,[_off+SRC4+YSTRIDE] \
+  __asm  punpcklbw mm2,mm7 \
+  __asm  movd mm6,[_off+SRC4+YSTRIDE*2] \
+  __asm  punpcklbw mm3,mm7 \
+  __asm  movd mm7,[_off+SRC4+YSTRIDE3] \
+  __asm  punpcklbw mm4,mm4 \
+  __asm  punpcklbw mm5,mm5 \
+  __asm  psrlw mm4,8 \
+  __asm  psrlw mm5,8 \
+  __asm  punpcklbw mm6,mm6 \
+  __asm  punpcklbw mm7,mm7 \
+  __asm  psrlw mm6,8 \
+  __asm  psrlw mm7,8 \
+}
+
+/*Performs the first two stages of an 8-point 1-D Hadamard transform.
+  The transform is performed in place, except that outputs 0-3 are swapped with
+   outputs 4-7.
+  Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to
+   perform this stage in place with no temporary registers).*/
+#define OC_HADAMARD_AB_8x4 __asm{ \
+  /*Stage A: \
+    Outputs 0-3 are swapped with 4-7 here.*/ \
+  __asm  paddw mm5,mm1 \
+  __asm  paddw mm6,mm2 \
+  __asm  paddw mm1,mm1 \
+  __asm  paddw mm2,mm2 \
+  __asm  psubw mm1,mm5 \
+  __asm  psubw mm2,mm6 \
+  __asm  paddw mm7,mm3 \
+  __asm  paddw mm4,mm0 \
+  __asm  paddw mm3,mm3 \
+  __asm  paddw mm0,mm0 \
+  __asm  psubw mm3,mm7 \
+  __asm  psubw mm0,mm4 \
+   /*Stage B:*/ \
+  __asm  paddw mm0,mm2 \
+  __asm  paddw mm1,mm3 \
+  __asm  paddw mm4,mm6 \
+  __asm  paddw mm5,mm7 \
+  __asm  paddw mm2,mm2 \
+  __asm  paddw mm3,mm3 \
+  __asm  paddw mm6,mm6 \
+  __asm  paddw mm7,mm7 \
+  __asm  psubw mm2,mm0 \
+  __asm  psubw mm3,mm1 \
+  __asm  psubw mm6,mm4 \
+  __asm  psubw mm7,mm5 \
+}
+
+/*Performs the last stage of an 8-point 1-D Hadamard transform in place.
+  Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in
+   place with no temporary registers).*/
+#define OC_HADAMARD_C_8x4 __asm{ \
+  /*Stage C:*/ \
+  __asm  paddw mm0,mm1 \
+  __asm  paddw mm2,mm3 \
+  __asm  paddw mm4,mm5 \
+  __asm  paddw mm6,mm7 \
+  __asm  paddw mm1,mm1 \
+  __asm  paddw mm3,mm3 \
+  __asm  paddw mm5,mm5 \
+  __asm  paddw mm7,mm7 \
+  __asm  psubw mm1,mm0 \
+  __asm  psubw mm3,mm2 \
+  __asm  psubw mm5,mm4 \
+  __asm  psubw mm7,mm6 \
+}
+
+/*Performs an 8-point 1-D Hadamard transform.
+  The transform is performed in place, except that outputs 0-3 are swapped with
+   outputs 4-7.
+  Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform
+   in place with no temporary registers).*/
+#define OC_HADAMARD_8x4 __asm{ \
+  OC_HADAMARD_AB_8x4 \
+  OC_HADAMARD_C_8x4 \
+}
+
+/*Performs the first part of the final stage of the Hadamard transform and
+   summing of absolute values.
+  At the end of this part, mm1 will contain the DC coefficient of the
+   transform.*/
+#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) __asm{ \
+  /*We use the fact that \
+      (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \
+     to merge the final butterfly with the abs and the first stage of \
+     accumulation. \
+    Thus we can avoid using pabsw, which is not available until SSSE3. \
+    Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \
+     implementation would be (3+3)*8+7=55 instructions (+4 for spilling \
+     registers). \
+    Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \
+    This implementation is only 26 (+4 for spilling registers).*/ \
+  __asm  movq [_r7+BUF],mm7 \
+  __asm  movq [_r6+BUF],mm6 \
+  /*mm7={0x7FFF}x4 \
+    mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \
+  __asm  pcmpeqb mm7,mm7 \
+  __asm  movq mm6,mm0 \
+  __asm  psrlw mm7,1 \
+  __asm  paddw mm6,mm1 \
+  __asm  pmaxsw mm0,mm1 \
+  __asm  paddsw mm6,mm7 \
+  __asm  psubw mm0,mm6 \
+  /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \
+    mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \
+  __asm  movq mm6,mm2 \
+  __asm  movq mm1,mm4 \
+  __asm  pmaxsw mm2,mm3 \
+  __asm  pmaxsw mm4,mm5 \
+  __asm  paddw mm6,mm3 \
+  __asm  paddw mm1,mm5 \
+  __asm  movq mm3,[_r7+BUF] \
+}
+
+/*Performs the second part of the final stage of the Hadamard transform and
+   summing of absolute values.*/
+#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) __asm{ \
+  __asm  paddsw mm6,mm7 \
+  __asm  movq mm5,[_r6+BUF] \
+  __asm  paddsw mm1,mm7 \
+  __asm  psubw mm2,mm6 \
+  __asm  psubw mm4,mm1 \
+  /*mm7={1}x4 (needed for the horizontal add that follows) \
+    mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \
+  __asm  movq mm6,mm3 \
+  __asm  pmaxsw mm3,mm5 \
+  __asm  paddw mm0,mm2 \
+  __asm  paddw mm6,mm5 \
+  __asm  paddw mm0,mm4 \
+  __asm  paddsw mm6,mm7 \
+  __asm  paddw mm0,mm3 \
+  __asm  psrlw mm7,14 \
+  __asm  psubw mm0,mm6 \
+}
+
+/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the
+   absolute value of each component, and accumulates everything into mm0.
+  This is the only portion of SATD which requires MMXEXT (we could use plain
+   MMX, but it takes 4 instructions and an extra register to work around the
+   lack of a pmaxsw, which is a pretty serious penalty).*/
+#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) __asm{ \
+  OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \
+  OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \
+}
+
+/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each
+   component, and accumulates everything into mm0.
+  Note that mm0 will have an extra 4 added to each column, and that after
+   removing this value, the remainder will be half the conventional value.*/
+#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) __asm{ \
+  OC_HADAMARD_AB_8x4 \
+  OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \
+}
+
+/*Performs two 4x4 transposes (mostly) in place.
+  On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7}
+   contains rows {a,b,c,d}.
+  On output, {0x40,0x50,0x60,0x70}+_off+BUF contains {e,f,g,h}^T, and
+   {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/
+#define OC_TRANSPOSE_4x4x2(_off) __asm{ \
+  /*First 4x4 transpose:*/ \
+  __asm  movq [0x10+_off+BUF],mm5 \
+  /*mm0 = e3 e2 e1 e0 \
+    mm1 = f3 f2 f1 f0 \
+    mm2 = g3 g2 g1 g0 \
+    mm3 = h3 h2 h1 h0*/ \
+  __asm  movq mm5,mm2 \
+  __asm  punpcklwd mm2,mm3 \
+  __asm  punpckhwd mm5,mm3 \
+  __asm  movq mm3,mm0 \
+  __asm  punpcklwd mm0,mm1 \
+  __asm  punpckhwd mm3,mm1 \
+  /*mm0 = f1 e1 f0 e0 \
+    mm3 = f3 e3 f2 e2 \
+    mm2 = h1 g1 h0 g0 \
+    mm5 = h3 g3 h2 g2*/ \
+  __asm  movq mm1,mm0 \
+  __asm  punpckldq mm0,mm2 \
+  __asm  punpckhdq mm1,mm2 \
+  __asm  movq mm2,mm3 \
+  __asm  punpckhdq mm3,mm5 \
+  __asm  movq [0x40+_off+BUF],mm0 \
+  __asm  punpckldq mm2,mm5 \
+  /*mm0 = h0 g0 f0 e0 \
+    mm1 = h1 g1 f1 e1 \
+    mm2 = h2 g2 f2 e2 \
+    mm3 = h3 g3 f3 e3*/ \
+  __asm  movq mm5,[0x10+_off+BUF] \
+  /*Second 4x4 transpose:*/ \
+  /*mm4 = a3 a2 a1 a0 \
+    mm5 = b3 b2 b1 b0 \
+    mm6 = c3 c2 c1 c0 \
+    mm7 = d3 d2 d1 d0*/ \
+  __asm  movq mm0,mm6 \
+  __asm  punpcklwd mm6,mm7 \
+  __asm  movq [0x50+_off+BUF],mm1 \
+  __asm  punpckhwd mm0,mm7 \
+  __asm  movq mm7,mm4 \
+  __asm  punpcklwd mm4,mm5 \
+  __asm  movq [0x60+_off+BUF],mm2 \
+  __asm  punpckhwd mm7,mm5 \
+  /*mm4 = b1 a1 b0 a0 \
+    mm7 = b3 a3 b2 a2 \
+    mm6 = d1 c1 d0 c0 \
+    mm0 = d3 c3 d2 c2*/ \
+  __asm  movq mm5,mm4 \
+  __asm  punpckldq mm4,mm6 \
+  __asm  movq [0x70+_off+BUF],mm3 \
+  __asm  punpckhdq mm5,mm6 \
+  __asm  movq mm6,mm7 \
+  __asm  punpckhdq mm7,mm0 \
+  __asm  punpckldq mm6,mm0 \
+  /*mm4 = d0 c0 b0 a0 \
+    mm5 = d1 c1 b1 a1 \
+    mm6 = d2 c2 b2 a2 \
+    mm7 = d3 c3 b3 a3*/ \
+}
+
+static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
+ int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
+  OC_ALIGN8(ogg_int16_t  buf[64]);
+  ogg_int16_t           *bufp;
+  unsigned               ret1;
+  unsigned               ret2;
+  bufp=buf;
+  __asm{
+#define SRC esi
+#define REF eax
+#define SRC_YSTRIDE ecx
+#define REF_YSTRIDE edx
+#define BUF edi
+#define RET eax
+#define RET2 edx
+    mov SRC,_src
+    mov SRC_YSTRIDE,_src_ystride
+    mov REF,_ref
+    mov REF_YSTRIDE,_ref_ystride
+    mov BUF,bufp
+    OC_LOAD_SUB_8x4(0x00)
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2(0x00)
+    /*Finish swapping out this 8x4 block to make room for the next one.
+      mm0...mm3 have been swapped out already.*/
+    movq [0x00+BUF],mm4
+    movq [0x10+BUF],mm5
+    movq [0x20+BUF],mm6
+    movq [0x30+BUF],mm7
+    OC_LOAD_SUB_8x4(0x04)
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2(0x08)
+    /*Here the first 4x4 block of output from the last transpose is the second
+       4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place, so
+       we only have to do half the loads.*/
+    movq mm1,[0x10+BUF]
+    movq mm2,[0x20+BUF]
+    movq mm3,[0x30+BUF]
+    movq mm0,[0x00+BUF]
+    OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38)
+    /*Up to this point, everything fit in 16 bits (8 input + 1 for the
+       difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
+       for the factor of two we dropped + 3 for the vertical accumulation).
+      Now we finally have to promote things to dwords.
+      We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
+       latency of pmaddwd by starting the next series of loads now.*/
+    mov RET2,_thresh
+    pmaddwd mm0,mm7
+    movq mm1,[0x50+BUF]
+    movq mm5,[0x58+BUF]
+    movq mm4,mm0
+    movq mm2,[0x60+BUF]
+    punpckhdq mm0,mm0
+    movq mm6,[0x68+BUF]
+    paddd mm4,mm0
+    movq mm3,[0x70+BUF]
+    movd RET,mm4
+    movq mm7,[0x78+BUF]
+    /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
+       added to them, and a factor of two removed; correct the final sum here.*/
+    lea RET,[RET+RET-32]
+    movq mm0,[0x40+BUF]
+    cmp RET,RET2
+    movq mm4,[0x48+BUF]
+    jae at_end
+    OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
+    pmaddwd mm0,mm7
+    /*There isn't much to stick in here to hide the latency this time, but the
+       alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
+       latency is even worse.*/
+    sub RET,32
+    movq mm4,mm0
+    punpckhdq mm0,mm0
+    paddd mm4,mm0
+    movd RET2,mm4
+    lea RET,[RET+RET2*2]
+    align 16
+at_end:
+    mov ret1,RET
+#undef SRC
+#undef REF
+#undef SRC_YSTRIDE
+#undef REF_YSTRIDE
+#undef BUF
+#undef RET
+#undef RET2
+  }
+  return ret1;
+}
+
+unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+}
+
+
+/*Our internal implementation of frag_copy2 takes an extra stride parameter so
+   we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
+  __asm{
+    /*Load the first 3 rows.*/
+#define DST_YSTRIDE ebx
+#define SRC_YSTRIDE esi
+#define DST eax
+#define SRC1 edx
+#define SRC2 ecx
+    mov DST_YSTRIDE,_dst_ystride
+    mov SRC_YSTRIDE,_src_ystride
+    mov DST,_dst
+    mov SRC1,_src1
+    mov SRC2,_src2
+    movq mm0,[SRC1]
+    movq mm1,[SRC2]
+    movq mm2,[SRC1+SRC_YSTRIDE]
+    lea SRC1,[SRC1+SRC_YSTRIDE*2]
+    movq mm3,[SRC2+SRC_YSTRIDE]
+    lea SRC2,[SRC2+SRC_YSTRIDE*2]
+    pxor mm7,mm7
+    movq mm4,[SRC1]
+    pcmpeqb mm6,mm6
+    movq mm5,[SRC2]
+    /*mm7={1}x8.*/
+    psubb mm7,mm6
+    /*Start averaging mm0 and mm1 into mm6.*/
+    movq mm6,mm0
+    pxor mm0,mm1
+    pavgb mm6,mm1
+    /*mm1 is free, start averaging mm3 into mm2 using mm1.*/
+    movq mm1,mm2
+    pand mm0,mm7
+    pavgb mm2,mm3
+    pxor mm1,mm3
+    /*mm3 is free.*/
+    psubb mm6,mm0
+    /*mm0 is free, start loading the next row.*/
+    movq mm0,[SRC1+SRC_YSTRIDE]
+    /*Start averaging mm5 and mm4 using mm3.*/
+    movq mm3,mm4
+    /*mm6 [row 0] is done; write it out.*/
+    movq [DST],mm6
+    pand mm1,mm7
+    pavgb mm4,mm5
+    psubb mm2,mm1
+    /*mm1 is free, continue loading the next row.*/
+    movq mm1,[SRC2+SRC_YSTRIDE]
+    pxor mm3,mm5
+    lea SRC1,[SRC1+SRC_YSTRIDE*2]
+    /*mm2 [row 1] is done; write it out.*/
+    movq [DST+DST_YSTRIDE],mm2
+    pand mm3,mm7
+    /*Start loading the next row.*/
+    movq mm2,[SRC1]
+    lea DST,[DST+DST_YSTRIDE*2]
+    psubb mm4,mm3
+    lea SRC2,[SRC2+SRC_YSTRIDE*2]
+    /*mm4 [row 2] is done; write it out.*/
+    movq [DST],mm4
+    /*Continue loading the next row.*/
+    movq mm3,[SRC2]
+    /*Start averaging mm0 and mm1 into mm6.*/
+    movq mm6,mm0
+    pxor mm0,mm1
+    /*Start loading the next row.*/
+    movq mm4,[SRC1+SRC_YSTRIDE]
+    pavgb mm6,mm1
+    /*mm1 is free; start averaging mm3 into mm2 using mm1.*/
+    movq mm1,mm2
+    pand mm0,mm7
+    /*Continue loading the next row.*/
+    movq mm5,[SRC2+SRC_YSTRIDE]
+    pavgb mm2,mm3
+    lea SRC1,[SRC1+SRC_YSTRIDE*2]
+    pxor mm1,mm3
+    /*mm3 is free.*/
+    psubb mm6,mm0
+    /*mm0 is free, start loading the next row.*/
+    movq mm0,[SRC1]
+    /*Start averaging mm5 into mm4 using mm3.*/
+    movq mm3,mm4
+    /*mm6 [row 3] is done; write it out.*/
+    movq [DST+DST_YSTRIDE],mm6
+    pand mm1,mm7
+    lea SRC2,[SRC2+SRC_YSTRIDE*2]
+    pavgb mm4,mm5
+    lea DST,[DST+DST_YSTRIDE*2]
+    psubb mm2,mm1
+    /*mm1 is free; continue loading the next row.*/
+    movq mm1,[SRC2]
+    pxor mm3,mm5
+    /*mm2 [row 4] is done; write it out.*/
+    movq [DST],mm2
+    pand mm3,mm7
+    /*Start loading the next row.*/
+    movq mm2,[SRC1+SRC_YSTRIDE]
+    psubb mm4,mm3
+    /*Start averaging mm0 and mm1 into mm6.*/
+    movq mm6,mm0
+    /*Continue loading the next row.*/
+    movq mm3,[SRC2+SRC_YSTRIDE]
+    /*mm4 [row 5] is done; write it out.*/
+    movq [DST+DST_YSTRIDE],mm4
+    pxor mm0,mm1
+    pavgb mm6,mm1
+    /*mm4 is free; start averaging mm3 into mm2 using mm4.*/
+    movq mm4,mm2
+    pand mm0,mm7
+    pavgb mm2,mm3
+    pxor mm4,mm3
+    lea DST,[DST+DST_YSTRIDE*2]
+    psubb mm6,mm0
+    pand mm4,mm7
+    /*mm6 [row 6] is done, write it out.*/
+    movq [DST],mm6
+    psubb mm2,mm4
+    /*mm2 [row 7] is done, write it out.*/
+    movq [DST+DST_YSTRIDE],mm2
+#undef SRC1
+#undef SRC2
+#undef SRC_YSTRIDE
+#undef DST_YSTRIDE
+#undef DST
+  }
+}
+
+unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  OC_ALIGN8(unsigned char ref[64]);
+  oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
+  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+}
+
+unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
+ int _ystride){
+  OC_ALIGN8(ogg_int16_t  buf[64]);
+  ogg_int16_t           *bufp;
+  unsigned               ret1;
+  unsigned               ret2;
+  bufp=buf;
+  __asm{
+#define SRC eax
+#define SRC4 esi
+#define BUF edi
+#define RET eax
+#define RET_WORD ax
+#define RET2 ecx
+#define YSTRIDE edx
+#define YSTRIDE3 ecx
+    mov SRC,_src
+    mov BUF,bufp
+    mov YSTRIDE,_ystride
+    /* src4 = src+4*ystride */
+    lea SRC4,[SRC+YSTRIDE*4]
+    /* ystride3 = 3*ystride */
+    lea YSTRIDE3,[YSTRIDE+YSTRIDE*2]
+    OC_LOAD_8x4(0x00)
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2(0x00)
+    /*Finish swapping out this 8x4 block to make room for the next one.
+      mm0...mm3 have been swapped out already.*/
+    movq [0x00+BUF],mm4
+    movq [0x10+BUF],mm5
+    movq [0x20+BUF],mm6
+    movq [0x30+BUF],mm7
+    OC_LOAD_8x4(0x04)
+    OC_HADAMARD_8x4
+    OC_TRANSPOSE_4x4x2(0x08)
+    /*Here the first 4x4 block of output from the last transpose is the second
+      4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place, so
+      we only have to do half the loads.*/
+    movq mm1,[0x10+BUF]
+    movq mm2,[0x20+BUF]
+    movq mm3,[0x30+BUF]
+    movq mm0,[0x00+BUF]
+    /*We split out the stages here so we can save the DC coefficient in the
+      middle.*/
+    OC_HADAMARD_AB_8x4
+    OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38)
+    movd RET,mm1
+    OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38)
+    /*Up to this point, everything fit in 16 bits (8 input + 1 for the
+      difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
+      for the factor of two we dropped + 3 for the vertical accumulation).
+      Now we finally have to promote things to dwords.
+      We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
+      latency of pmaddwd by starting the next series of loads now.*/
+    pmaddwd mm0,mm7
+    movq mm1,[0x50+BUF]
+    movq mm5,[0x58+BUF]
+    movq mm2,[0x60+BUF]
+    movq mm4,mm0
+    movq mm6,[0x68+BUF]
+    punpckhdq mm0,mm0
+    movq mm3,[0x70+BUF]
+    paddd mm4,mm0
+    movq mm7,[0x78+BUF]
+    movd RET2,mm4
+    movq mm0,[0x40+BUF]
+    movq mm4,[0x48+BUF]
+    OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
+    pmaddwd mm0,mm7
+    /*We assume that the DC coefficient is always positive (which is true,
+    because the input to the INTRA transform was not a difference).*/
+    movzx RET,RET_WORD
+    add RET2,RET2
+    sub RET2,RET
+    movq mm4,mm0
+    punpckhdq mm0,mm0
+    paddd mm4,mm0
+    movd RET,mm4
+    lea RET,[-64+RET2+RET*2]
+    mov [ret1],RET
+#undef SRC
+#undef SRC4
+#undef BUF
+#undef RET
+#undef RET_WORD
+#undef RET2
+#undef YSTRIDE
+#undef YSTRIDE3
+  }
+  return ret1;
+}
+
+void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64],
+ const unsigned char *_src, const unsigned char *_ref,int _ystride){
+  int i;
+  __asm  pxor mm7,mm7
+  for(i=4;i-->0;){
+    __asm{
+#define SRC edx
+#define YSTRIDE ebx
+#define RESIDUE eax
+#define REF ecx
+      mov YSTRIDE,_ystride
+      mov RESIDUE,_residue
+      mov SRC,_src
+      mov REF,_ref
+      /*mm0=[src]*/
+      movq mm0,[SRC]
+      /*mm1=[ref]*/
+      movq mm1,[REF]
+      /*mm4=[src+ystride]*/
+      movq mm4,[SRC+YSTRIDE]
+      /*mm5=[ref+ystride]*/
+      movq mm5,[REF+YSTRIDE]
+      /*Compute [src]-[ref].*/
+      movq mm2,mm0
+      punpcklbw mm0,mm7
+      movq mm3,mm1
+      punpckhbw mm2,mm7
+      punpcklbw mm1,mm7
+      punpckhbw mm3,mm7
+      psubw mm0,mm1
+      psubw mm2,mm3
+      /*Compute [src+ystride]-[ref+ystride].*/
+      movq mm1,mm4
+      punpcklbw mm4,mm7
+      movq mm3,mm5
+      punpckhbw mm1,mm7
+      lea SRC,[SRC+YSTRIDE*2]
+      punpcklbw mm5,mm7
+      lea REF,[REF+YSTRIDE*2]
+      punpckhbw mm3,mm7
+      psubw mm4,mm5
+      psubw mm1,mm3
+      /*Write the answer out.*/
+      movq [RESIDUE+0x00],mm0
+      movq [RESIDUE+0x08],mm2
+      movq [RESIDUE+0x10],mm4
+      movq [RESIDUE+0x18],mm1
+      lea RESIDUE,[RESIDUE+0x20]
+      mov _residue,RESIDUE
+      mov _src,SRC
+      mov _ref,REF
+#undef SRC
+#undef YSTRIDE
+#undef RESIDUE
+#undef REF
+    }
+  }
+}
+
+void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64],
+ const unsigned char *_src,int _ystride){
+   __asm{
+#define YSTRIDE edx
+#define YSTRIDE3 ebx
+#define RESIDUE ecx
+#define SRC eax
+    mov YSTRIDE,_ystride
+    mov RESIDUE,_residue
+    mov SRC,_src
+    /*mm0=[src]*/
+    movq mm0,[SRC]
+    /*mm1=[src+ystride]*/
+    movq mm1,[SRC+YSTRIDE]
+    /*mm6={-1}x4*/
+    pcmpeqw mm6,mm6
+    /*mm2=[src+2*ystride]*/
+    movq mm2,[SRC+YSTRIDE*2]
+    /*[ystride3]=3*[ystride]*/
+    lea YSTRIDE3,[YSTRIDE+YSTRIDE*2]
+    /*mm6={1}x4*/
+    psllw mm6,15
+    /*mm3=[src+3*ystride]*/
+    movq mm3,[SRC+YSTRIDE3]
+    /*mm6={128}x4*/
+    psrlw mm6,8
+    /*mm7=0*/ 
+    pxor mm7,mm7
+    /*[src]=[src]+4*[ystride]*/
+    lea SRC,[SRC+YSTRIDE*4]
+    /*Compute [src]-128 and [src+ystride]-128*/
+    movq mm4,mm0
+    punpcklbw mm0,mm7
+    movq mm5,mm1
+    punpckhbw mm4,mm7
+    psubw mm0,mm6
+    punpcklbw mm1,mm7
+    psubw mm4,mm6
+    punpckhbw mm5,mm7
+    psubw mm1,mm6
+    psubw mm5,mm6
+    /*Write the answer out.*/
+    movq [RESIDUE+0x00],mm0
+    movq [RESIDUE+0x08],mm4
+    movq [RESIDUE+0x10],mm1
+    movq [RESIDUE+0x18],mm5
+    /*mm0=[src+4*ystride]*/
+    movq mm0,[SRC]
+    /*mm1=[src+5*ystride]*/
+    movq mm1,[SRC+YSTRIDE]
+    /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/
+    movq mm4,mm2
+    punpcklbw mm2,mm7
+    movq mm5,mm3
+    punpckhbw mm4,mm7
+    psubw mm2,mm6
+    punpcklbw mm3,mm7
+    psubw mm4,mm6
+    punpckhbw mm5,mm7
+    psubw mm3,mm6
+    psubw mm5,mm6
+    /*Write the answer out.*/
+    movq [RESIDUE+0x20],mm2
+    movq [RESIDUE+0x28],mm4
+    movq [RESIDUE+0x30],mm3
+    movq [RESIDUE+0x38],mm5
+    /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/
+    movq mm2,[SRC+YSTRIDE*2]
+    movq mm3,[SRC+YSTRIDE3]
+    movq mm4,mm0
+    punpcklbw mm0,mm7
+    movq mm5,mm1
+    punpckhbw mm4,mm7
+    psubw mm0,mm6
+    punpcklbw mm1,mm7
+    psubw mm4,mm6
+    punpckhbw mm5,mm7
+    psubw mm1,mm6
+    psubw mm5,mm6
+    /*Write the answer out.*/
+    movq [RESIDUE+0x40],mm0
+    movq [RESIDUE+0x48],mm4
+    movq [RESIDUE+0x50],mm1
+    movq [RESIDUE+0x58],mm5
+    /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/
+    movq mm4,mm2
+    punpcklbw mm2,mm7
+    movq mm5,mm3
+    punpckhbw mm4,mm7
+    psubw mm2,mm6
+    punpcklbw mm3,mm7
+    psubw mm4,mm6
+    punpckhbw mm5,mm7
+    psubw mm3,mm6
+    psubw mm5,mm6
+    /*Write the answer out.*/
+    movq [RESIDUE+0x60],mm2
+    movq [RESIDUE+0x68],mm4
+    movq [RESIDUE+0x70],mm3
+    movq [RESIDUE+0x78],mm5
+#undef YSTRIDE
+#undef YSTRIDE3
+#undef RESIDUE
+#undef SRC
+  }
+}
+
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride);
+}
+
+#endif

Copied: trunk/theora/lib/x86_vc/mmxfdct.c (from rev 16442, trunk/theora/lib/enc/x86_vc/mmxfdct.c)
===================================================================
--- trunk/theora/lib/x86_vc/mmxfdct.c	                        (rev 0)
+++ trunk/theora/lib/x86_vc/mmxfdct.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,670 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************/ 
+ /*MMX fDCT implementation for x86_32*/
+/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+#define OC_FDCT_STAGE1_8x4  __asm{ \
+  /*Stage 1:*/ \
+  /*mm0=t7'=t0-t7*/ \
+  __asm  psubw mm0,mm7 \
+  __asm  paddw mm7,mm7 \
+  /*mm1=t6'=t1-t6*/ \
+  __asm  psubw mm1, mm6 \
+  __asm  paddw mm6,mm6 \
+  /*mm2=t5'=t2-t5*/ \
+  __asm  psubw mm2,mm5 \
+  __asm  paddw mm5,mm5 \
+  /*mm3=t4'=t3-t4*/ \
+  __asm  psubw mm3,mm4 \
+  __asm  paddw mm4,mm4 \
+  /*mm7=t0'=t0+t7*/ \
+  __asm  paddw mm7,mm0 \
+  /*mm6=t1'=t1+t6*/  \
+  __asm  paddw mm6,mm1 \
+  /*mm5=t2'=t2+t5*/ \
+  __asm  paddw mm5,mm2 \
+  /*mm4=t3'=t3+t4*/ \
+  __asm  paddw mm4,mm3\
+}
+
+#define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \
+  /*Stage 2:*/ \
+  /*mm7=t3''=t0'-t3'*/ \
+  __asm  psubw mm7,mm4 \
+  __asm  paddw mm4,mm4 \
+  /*mm6=t2''=t1'-t2'*/ \
+  __asm  psubw mm6,mm5 \
+  __asm  movq [Y+_r6],mm7 \
+  __asm  paddw mm5,mm5 \
+  /*mm1=t5''=t6'-t5'*/ \
+  __asm  psubw mm1,mm2 \
+  __asm  movq [Y+_r2],mm6 \
+  /*mm4=t0''=t0'+t3'*/ \
+  __asm  paddw mm4,mm7 \
+  __asm  paddw mm2,mm2 \
+  /*mm5=t1''=t1'+t2'*/ \
+  __asm  movq [Y+_r0],mm4 \
+  __asm  paddw mm5,mm6 \
+  /*mm2=t6''=t6'+t5'*/ \
+  __asm  paddw mm2,mm1 \
+  __asm  movq [Y+_r4],mm5 \
+  /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \
+  /*mm4, mm5, mm6, mm7 are free.*/ \
+  /*Stage 3:*/ \
+  /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \
+  __asm  mov A,0x5A806A0A \
+  __asm  pcmpeqb mm6,mm6 \
+  __asm  movd mm7,A \
+  __asm  psrlw mm6,15 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddw mm6,mm6 \
+  /*mm0=0, m2={-1}x4 \
+    mm5:mm4=t5''*27146+0xB500*/ \
+  __asm  movq mm4,mm1 \
+  __asm  movq mm5,mm1 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  movq [Y+_r3],mm2 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  movq [Y+_r7],mm0 \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  pxor mm0,mm0 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pcmpeqb mm2,mm2 \
+  /*mm2=t6'', mm1=t5''+(t5''!=0) \
+    mm4=(t5''*27146+0xB500>>16)*/ \
+  __asm  pcmpeqw mm0,mm1 \
+  __asm  psrad mm4,16 \
+  __asm  psubw mm0,mm2 \
+  __asm  movq mm2, [Y+_r3] \
+  __asm  psrad mm5,16 \
+  __asm  paddw mm1,mm0 \
+  __asm  packssdw mm4,mm5 \
+  /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \
+  __asm  paddw mm4,mm1 \
+  __asm  movq mm0, [Y+_r7] \
+  __asm  psraw mm4,1 \
+  __asm  movq mm1,mm3 \
+  /*mm3=t4''=t4'+s*/ \
+  __asm  paddw mm3,mm4 \
+  /*mm1=t5'''=t4'-s*/ \
+  __asm  psubw mm1,mm4 \
+  /*mm1=0, mm3={-1}x4 \
+    mm5:mm4=t6''*27146+0xB500*/ \
+  __asm  movq mm4,mm2 \
+  __asm  movq mm5,mm2 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  movq [Y+_r5],mm1 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  movq [Y+_r1],mm3 \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  pxor mm1,mm1 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pcmpeqb mm3,mm3 \
+  /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \
+  __asm  psrad mm4,16 \
+  __asm  pcmpeqw mm1,mm2 \
+  __asm  psrad mm5,16 \
+  __asm  psubw mm1,mm3 \
+  __asm  packssdw mm4,mm5 \
+  __asm  paddw mm2,mm1 \
+  /*mm1=t1'' \
+    mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \
+  __asm  paddw mm4,mm2 \
+  __asm  movq mm1,[Y+_r4] \
+  __asm  psraw mm4,1 \
+  __asm  movq mm2,mm0 \
+  /*mm7={54491-0x7FFF,0x7FFF}x2 \
+    mm0=t7''=t7'+s*/ \
+  __asm  paddw mm0,mm4 \
+  /*mm2=t6'''=t7'-s*/ \
+  __asm  psubw mm2,mm4 \
+  /*Stage 4:*/ \
+  /*mm0=0, mm2=t0'' \
+    mm5:mm4=t1''*27146+0xB500*/ \
+  __asm  movq mm4,mm1 \
+  __asm  movq mm5,mm1 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  movq [Y+_r3],mm2 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  movq mm2,[Y+_r0] \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  movq [Y+_r7],mm0 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pxor mm0,mm0 \
+  /*mm7={27146,0x4000>>1}x2 \
+    mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \
+  __asm  psrad mm4,16 \
+  __asm  mov A,0x20006A0A \
+  __asm  pcmpeqw mm0,mm1 \
+  __asm  movd mm7,A \
+  __asm  psrad mm5,16 \
+  __asm  psubw mm0,mm3 \
+  __asm  packssdw mm4,mm5 \
+  __asm  paddw mm0,mm1 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddw mm0,mm4 \
+  /*mm6={0x00000E3D}x2 \
+    mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \
+  __asm  movq mm4,mm2 \
+  __asm  movq mm5,mm2 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  mov A,0x0E3D \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  movd mm6,A \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pxor mm1,mm1 \
+  __asm  punpckldq mm6,mm6 \
+  __asm  pcmpeqw mm1,mm2 \
+  /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \
+  __asm  psrad mm4,16 \
+  __asm  psubw mm1,mm3 \
+  __asm  psrad mm5,16 \
+  __asm  paddw mm2,mm1 \
+  __asm  packssdw mm4,mm5 \
+  __asm  movq mm1,[Y+_r5] \
+  __asm  paddw mm4,mm2 \
+  /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
+    The naive implementation could cause overflow, so we use \
+     u=(r&s)+((r^s)>>1).*/ \
+  __asm  movq mm2,[Y+_r3] \
+  __asm  movq mm7,mm0 \
+  __asm  pxor mm0,mm4 \
+  __asm  pand mm7,mm4 \
+  __asm  psraw mm0,1 \
+  __asm  mov A,0x7FFF54DC \
+  __asm  paddw mm0,mm7 \
+  __asm  movd mm7,A \
+  /*mm7={54491-0x7FFF,0x7FFF}x2 \
+    mm4=_y[4]=v=r-u*/ \
+  __asm  psubw mm4,mm0 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  movq [Y+_r4],mm4 \
+  /*mm0=0, mm7={36410}x4 \
+    mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \
+  __asm  movq mm4,mm1 \
+  __asm  movq mm5,mm1 \
+  __asm  punpcklwd mm4,mm1 \
+  __asm  mov A,0x8E3A8E3A \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  movq [Y+_r0],mm0 \
+  __asm  punpckhwd mm5,mm1 \
+  __asm  pxor mm0,mm0 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pcmpeqw mm1,mm0 \
+  __asm  movd mm7,A \
+  __asm  psubw mm1,mm3 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddd mm4,mm6 \
+  __asm  paddd mm5,mm6 \
+  /*mm0=0 \
+    mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \
+  __asm  movq mm6,mm2 \
+  __asm  movq mm3,mm2 \
+  __asm  pmulhw mm6,mm7 \
+  __asm  paddw mm1,mm2 \
+  __asm  pmullw mm3,mm7 \
+  __asm  pxor mm0,mm0 \
+  __asm  paddw mm6,mm1 \
+  __asm  movq mm1,mm3 \
+  __asm  punpckhwd mm3,mm6 \
+  __asm  punpcklwd mm1,mm6 \
+  /*mm3={-1}x4, mm6={1}x4 \
+    mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \
+  __asm  paddd mm5,mm3 \
+  __asm  paddd mm4,mm1 \
+  __asm  psrad mm5,16 \
+  __asm  pxor mm6,mm6 \
+  __asm  psrad mm4,16 \
+  __asm  pcmpeqb mm3,mm3 \
+  __asm  packssdw mm4,mm5 \
+  __asm  psubw mm6,mm3 \
+  /*mm1=t7'', mm7={26568,0x3400}x2 \
+    mm2=s=t6'''-(36410*u>>16)*/ \
+  __asm  movq mm1,mm4 \
+  __asm  mov A,0x340067C8 \
+  __asm  pmulhw mm4,mm7 \
+  __asm  movd mm7,A \
+  __asm  movq [Y+_r5],mm1 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddw mm4,mm1 \
+  __asm  movq mm1,[Y+_r7] \
+  __asm  psubw mm2,mm4 \
+  /*mm6={0x00007B1B}x2 \
+    mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \
+  __asm  movq mm4,mm2 \
+  __asm  movq mm5,mm2 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  pcmpeqw mm0,mm2 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  mov A,0x7B1B \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  movd mm6,A \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  psubw mm0,mm3 \
+  __asm  punpckldq mm6,mm6 \
+  /*mm7={64277-0x7FFF,0x7FFF}x2 \
+    mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \
+  __asm  psrad mm4,17 \
+  __asm  paddw mm2,mm0 \
+  __asm  psrad mm5,17 \
+  __asm  mov A,0x7FFF7B16 \
+  __asm  packssdw mm4,mm5 \
+  __asm  movd mm7,A \
+  __asm  paddw mm2,mm4 \
+  __asm  punpckldq mm7,mm7 \
+  /*mm0=0, mm7={12785}x4 \
+    mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \
+  __asm  movq mm4,mm1 \
+  __asm  movq mm5,mm1 \
+  __asm  movq [Y+_r3],mm2 \
+  __asm  punpcklwd mm4,mm1 \
+  __asm  movq mm2,[Y+_r1] \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  mov A,0x31F131F1 \
+  __asm  punpckhwd mm5,mm1 \
+  __asm  pxor mm0,mm0 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  pcmpeqw mm1,mm0 \
+  __asm  movd mm7,A \
+  __asm  psubw mm1,mm3 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddd mm4,mm6 \
+  __asm  paddd mm5,mm6 \
+  /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \
+  __asm  movq mm6,mm2 \
+  __asm  movq mm3,mm2 \
+  __asm  pmulhw mm6,mm7 \
+  __asm  pmullw mm3,mm7 \
+  __asm  paddw mm6,mm1 \
+  __asm  movq mm1,mm3 \
+  __asm  punpckhwd mm3,mm6 \
+  __asm  punpcklwd mm1,mm6 \
+  /*mm3={-1}x4, mm6={1}x4 \
+    mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \
+  __asm  paddd mm5,mm3 \
+  __asm  paddd mm4,mm1 \
+  __asm  psrad mm5,16 \
+  __asm  pxor mm6,mm6 \
+  __asm  psrad mm4,16 \
+  __asm  pcmpeqb mm3,mm3 \
+  __asm  packssdw mm4,mm5 \
+  __asm  psubw mm6,mm3 \
+  /*mm1=t3'', mm7={20539,0x3000}x2 \
+    mm4=s=(12785*u>>16)-t4''*/ \
+  __asm  movq [Y+_r1],mm4 \
+  __asm  pmulhw mm4,mm7 \
+  __asm  mov A,0x3000503B \
+  __asm  movq mm1,[Y+_r6] \
+  __asm  movd mm7,A \
+  __asm  psubw mm4,mm2 \
+  __asm  punpckldq mm7,mm7 \
+  /*mm6={0x00006CB7}x2 \
+    mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \
+  __asm  movq mm5,mm4 \
+  __asm  movq mm2,mm4 \
+  __asm  punpcklwd mm4,mm6 \
+  __asm  pcmpeqw mm0,mm2 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  mov A,0x6CB7 \
+  __asm  punpckhwd mm5,mm6 \
+  __asm  movd mm6,A \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  psubw mm0,mm3 \
+  __asm  punpckldq mm6,mm6 \
+  /*mm7={60547-0x7FFF,0x7FFF}x2 \
+    mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \
+  __asm  psrad mm4,20 \
+  __asm  paddw mm2,mm0 \
+  __asm  psrad mm5,20 \
+  __asm  mov A,0x7FFF6C84 \
+  __asm  packssdw mm4,mm5 \
+  __asm  movd mm7,A \
+  __asm  paddw mm2,mm4 \
+  __asm  punpckldq mm7,mm7 \
+  /*mm0=0, mm7={25080}x4 \
+    mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \
+  __asm  movq mm4,mm1 \
+  __asm  movq mm5,mm1 \
+  __asm  movq [Y+_r7],mm2 \
+  __asm  punpcklwd mm4,mm1 \
+  __asm  movq mm2,[Y+_r2] \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  mov A,0x61F861F8 \
+  __asm  punpckhwd mm5,mm1 \
+  __asm  pxor mm0,mm0 \
+  __asm  pmaddwd mm5,mm7 \
+  __asm  movd mm7,A \
+  __asm  pcmpeqw mm1,mm0 \
+  __asm  psubw mm1,mm3 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  paddd mm4,mm6 \
+  __asm  paddd mm5,mm6 \
+  /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \
+  __asm  movq mm6,mm2 \
+  __asm  movq mm3,mm2 \
+  __asm  pmulhw mm6,mm7 \
+  __asm  pmullw mm3,mm7 \
+  __asm  paddw mm6,mm1 \
+  __asm  movq mm1,mm3 \
+  __asm  punpckhwd mm3,mm6 \
+  __asm  punpcklwd mm1,mm6 \
+  /*mm1={-1}x4 \
+    mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \
+  __asm  paddd mm5,mm3 \
+  __asm  paddd mm4,mm1 \
+  __asm  psrad mm5,16 \
+  __asm  mov A,0x28005460 \
+  __asm  psrad mm4,16 \
+  __asm  pcmpeqb mm1,mm1 \
+  __asm  packssdw mm4,mm5 \
+  /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \
+    mm4=s=(25080*u>>16)-t2''*/ \
+  __asm  movq mm6,mm4 \
+  __asm  pmulhw mm4,mm7 \
+  __asm  pxor mm5,mm5 \
+  __asm  movd mm7,A \
+  __asm  psubw mm5,mm1 \
+  __asm  punpckldq mm7,mm7 \
+  __asm  psubw mm4,mm2 \
+  /*mm2=s+(s!=0) \
+    mm4:mm3=s*21600+0x2800*/ \
+  __asm  movq mm3,mm4 \
+  __asm  movq mm2,mm4 \
+  __asm  punpckhwd mm4,mm5 \
+  __asm  pcmpeqw mm0,mm2 \
+  __asm  pmaddwd mm4,mm7 \
+  __asm  psubw mm0,mm1 \
+  __asm  punpcklwd mm3,mm5 \
+  __asm  paddw mm2,mm0 \
+  __asm  pmaddwd mm3,mm7 \
+  /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \
+    mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \
+  __asm  movq mm0,[Y+_r4] \
+  __asm  psrad mm4,18 \
+  __asm  movq mm5,[Y+_r5] \
+  __asm  psrad mm3,18 \
+  __asm  movq mm1,[Y+_r7] \
+  __asm  packssdw mm3,mm4 \
+  __asm  movq mm4,[Y+_r0] \
+  __asm  paddw mm3,mm2 \
+}
+
+/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7].
+  On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and
+   {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/
+#define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \
+  /*First 4x4 transpose:*/ \
+  /*mm0 = e3 e2 e1 e0 \
+    mm5 = f3 f2 f1 f0 \
+    mm3 = g3 g2 g1 g0 \
+    mm1 = h3 h2 h1 h0*/ \
+  __asm  movq mm2,mm0 \
+  __asm  punpcklwd mm0,mm5 \
+  __asm  punpckhwd mm2,mm5 \
+  __asm  movq mm5,mm3 \
+  __asm  punpcklwd mm3,mm1 \
+  __asm  punpckhwd mm5,mm1 \
+  /*mm0 = f1 e1 f0 e0 \
+    mm2 = f3 e3 f2 e2 \
+    mm3 = h1 g1 h0 g0 \
+    mm5 = h3 g3 h2 g2*/ \
+  __asm  movq mm1,mm0 \
+  __asm  punpckldq mm0,mm3 \
+  __asm  movq [Y+_r4],mm0 \
+  __asm  punpckhdq mm1,mm3 \
+  __asm  movq mm0,[Y+_r1] \
+  __asm  movq mm3,mm2 \
+  __asm  punpckldq mm2,mm5 \
+  __asm  punpckhdq mm3,mm5 \
+  __asm  movq mm5,[Y+_r3] \
+  /*_y[4] = h0 g0 f0 e0 \
+   mm1  = h1 g1 f1 e1 \
+   mm2  = h2 g2 f2 e2 \
+   mm3  = h3 g3 f3 e3*/ \
+  /*Second 4x4 transpose:*/ \
+  /*mm4 = a3 a2 a1 a0 \
+    mm0 = b3 b2 b1 b0 \
+    mm6 = c3 c2 c1 c0 \
+    mm5 = d3 d2 d1 d0*/ \
+  __asm  movq mm7,mm4 \
+  __asm  punpcklwd mm4,mm0 \
+  __asm  punpckhwd mm7,mm0 \
+  __asm  movq mm0,mm6 \
+  __asm  punpcklwd mm6,mm5 \
+  __asm  punpckhwd mm0,mm5 \
+  /*mm4 = b1 a1 b0 a0 \
+    mm7 = b3 a3 b2 a2 \
+    mm6 = d1 c1 d0 c0 \
+    mm0 = d3 c3 d2 c2*/ \
+  __asm  movq mm5,mm4 \
+  __asm  punpckldq mm4,mm6 \
+  __asm  punpckhdq mm5,mm6 \
+  __asm  movq mm6,mm7 \
+  __asm  punpckhdq mm7,mm0 \
+  __asm  punpckldq mm6,mm0 \
+  /*mm4 = d0 c0 b0 a0 \
+    mm5 = d1 c1 b1 a1 \
+    mm6 = d2 c2 b2 a2 \
+    mm7 = d3 c3 b3 a3*/ \
+}
+
+/*MMX implementation of the fDCT.*/
+void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ptrdiff_t a;
+  __asm{
+#define Y eax
+#define A ecx
+#define X edx
+    /*Add two extra bits of working precision to improve accuracy; any more and
+       we could overflow.*/
+    /*We also add biases to correct for some systematic error that remains in
+       the full fDCT->iDCT round trip.*/
+    mov X, _x
+    mov Y, _y
+    movq mm0,[0x00+X]
+    movq mm1,[0x10+X]
+    movq mm2,[0x20+X]
+    movq mm3,[0x30+X]
+    pcmpeqb mm4,mm4
+    pxor mm7,mm7
+    movq mm5,mm0
+    psllw mm0,2
+    pcmpeqw mm5,mm7
+    movq mm7,[0x70+X]
+    psllw mm1,2
+    psubw mm5,mm4
+    psllw mm2,2
+    mov A,1
+    pslld mm5,16
+    movd mm6,A
+    psllq mm5,16
+    mov A,0x10001
+    psllw mm3,2
+    movd mm4,A
+    punpckhwd mm5,mm6
+    psubw mm1,mm6
+    movq mm6,[0x60+X]
+    paddw mm0,mm5
+    movq mm5,[0x50+X]
+    paddw mm0,mm4
+    movq mm4,[0x40+X]
+    /*We inline stage1 of the transform here so we can get better instruction
+       scheduling with the shifts.*/
+    /*mm0=t7'=t0-t7*/
+    psllw mm7,2
+    psubw mm0,mm7
+    psllw mm6,2
+    paddw mm7,mm7
+    /*mm1=t6'=t1-t6*/
+    psllw mm5,2
+    psubw mm1,mm6
+    psllw mm4,2
+    paddw mm6,mm6
+    /*mm2=t5'=t2-t5*/
+    psubw mm2,mm5
+    paddw mm5,mm5
+    /*mm3=t4'=t3-t4*/
+    psubw mm3,mm4
+    paddw mm4,mm4
+    /*mm7=t0'=t0+t7*/
+    paddw mm7,mm0
+    /*mm6=t1'=t1+t6*/
+    paddw mm6,mm1
+    /*mm5=t2'=t2+t5*/
+    paddw mm5,mm2
+    /*mm4=t3'=t3+t4*/
+    paddw mm4,mm3
+    OC_FDCT8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70)
+    OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70)
+    /*Swap out this 8x4 block for the next one.*/
+    movq mm0,[0x08+X]
+    movq [0x30+Y],mm7
+    movq mm7,[0x78+X]
+    movq [0x50+Y],mm1
+    movq mm1,[0x18+X]
+    movq [0x20+Y],mm6
+    movq mm6,[0x68+X]
+    movq [0x60+Y],mm2
+    movq mm2,[0x28+X]
+    movq [0x10+Y],mm5
+    movq mm5,[0x58+X]
+    movq [0x70+Y],mm3
+    movq mm3,[0x38+X]
+    /*And increase its working precision, too.*/
+    psllw mm0,2
+    movq [0x00+Y],mm4
+    psllw mm7,2
+    movq mm4,[0x48+X]
+    /*We inline stage1 of the transform here so we can get better instruction
+       scheduling with the shifts.*/
+    /*mm0=t7'=t0-t7*/
+    psubw mm0,mm7
+    psllw mm1,2
+    paddw mm7,mm7
+    psllw mm6,2
+    /*mm1=t6'=t1-t6*/
+    psubw mm1,mm6
+    psllw mm2,2
+    paddw mm6,mm6
+    psllw mm5,2
+    /*mm2=t5'=t2-t5*/
+    psubw mm2,mm5
+    psllw mm3,2
+    paddw mm5,mm5
+    psllw mm4,2
+    /*mm3=t4'=t3-t4*/
+    psubw mm3,mm4
+    paddw mm4,mm4
+    /*mm7=t0'=t0+t7*/
+    paddw mm7,mm0
+    /*mm6=t1'=t1+t6*/
+    paddw mm6,mm1
+    /*mm5=t2'=t2+t5*/
+    paddw mm5,mm2
+    /*mm4=t3'=t3+t4*/
+    paddw mm4,mm3
+    OC_FDCT8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78)
+    OC_TRANSPOSE8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78)
+    /*Here the first 4x4 block of output from the last transpose is the second
+       4x4 block of input for the next transform.
+      We have cleverly arranged that it already be in the appropriate place,
+       so we only have to do half the stores and loads.*/
+    movq mm0,[0x00+Y]
+    movq [0x58+Y],mm1
+    movq mm1,[0x10+Y]
+    movq [0x68+Y],mm2
+    movq mm2,[0x20+Y]
+    movq [0x78+Y],mm3
+    movq mm3,[0x30+Y]
+    OC_FDCT_STAGE1_8x4
+    OC_FDCT8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38)
+    OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38)
+    /*mm0={-2}x4*/
+    pcmpeqw mm0,mm0
+    paddw mm0,mm0
+    /*Round the results.*/
+    psubw mm1,mm0
+    psubw mm2,mm0
+    psraw mm1,2
+    psubw mm3,mm0
+    movq [0x18+Y],mm1
+    psraw mm2,2
+    psubw mm4,mm0
+    movq mm1,[0x08+Y]
+    psraw mm3,2
+    psubw mm5,mm0
+    psraw mm4,2
+    psubw mm6,mm0
+    psraw mm5,2
+    psubw mm7,mm0
+    psraw mm6,2
+    psubw mm1,mm0
+    psraw mm7,2
+    movq mm0,[0x40+Y]
+    psraw mm1,2
+    movq [0x30+Y],mm7
+    movq mm7,[0x78+Y]
+    movq [0x08+Y],mm1
+    movq mm1,[0x50+Y]
+    movq [0x20+Y],mm6
+    movq mm6,[0x68+Y]
+    movq [0x28+Y],mm2
+    movq mm2,[0x60+Y]
+    movq [0x10+Y],mm5
+    movq mm5,[0x58+Y]
+    movq [0x38+Y],mm3
+    movq mm3,[0x70+Y]
+    movq [0x00+Y],mm4
+    movq mm4,[0x48+Y]
+    OC_FDCT_STAGE1_8x4
+    OC_FDCT8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78)
+    OC_TRANSPOSE8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78)
+    /*mm0={-2}x4*/
+    pcmpeqw mm0,mm0
+    paddw mm0,mm0
+    /*Round the results.*/
+    psubw mm1,mm0
+    psubw mm2,mm0
+    psraw mm1,2
+    psubw mm3,mm0
+    movq [0x58+Y],mm1
+    psraw mm2,2
+    psubw mm4,mm0
+    movq mm1,[0x48+Y]
+    psraw mm3,2
+    psubw mm5,mm0
+    movq [0x68+Y],mm2
+    psraw mm4,2
+    psubw mm6,mm0
+    movq [0x78+Y],mm3
+    psraw mm5,2
+    psubw mm7,mm0
+    movq [0x40+Y],mm4
+    psraw mm6,2
+    psubw mm1,mm0
+    movq [0x50+Y],mm5
+    psraw mm7,2
+    movq [0x60+Y],mm6
+    psraw mm1,2
+    movq [0x70+Y],mm7
+    movq [0x48+Y],mm1
+#undef Y
+#undef A
+#undef X
+  }
+}
+
+#endif

Copied: trunk/theora/lib/x86_vc/x86enc.c (from rev 16442, trunk/theora/lib/enc/x86_vc/x86enc.c)
===================================================================
--- trunk/theora/lib/x86_vc/x86enc.c	                        (rev 0)
+++ trunk/theora/lib/x86_vc/x86enc.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,49 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $
+
+ ********************************************************************/
+#include "x86enc.h"
+
+#if defined(OC_X86_ASM)
+
+#include "../cpu.c"
+
+void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){
+  ogg_uint32_t cpu_flags;
+  cpu_flags=oc_cpu_flags_get();
+  oc_enc_vtable_init_c(_enc);
+  if(cpu_flags&OC_CPU_X86_MMX){
+    _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx;
+    _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx;
+    _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx;
+  }
+  if(cpu_flags&OC_CPU_X86_MMXEXT){
+    _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
+    _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
+    _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
+    _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
+    _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+    _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
+    _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
+  }
+  if(cpu_flags&OC_CPU_X86_SSE2){
+# if defined(OC_X86_64_ASM)
+    _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;
+# endif
+  }
+}
+#endif

Copied: trunk/theora/lib/x86_vc/x86enc.h (from rev 16442, trunk/theora/lib/enc/x86_vc/x86enc.h)
===================================================================
--- trunk/theora/lib/x86_vc/x86enc.h	                        (rev 0)
+++ trunk/theora/lib/x86_vc/x86enc.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -0,0 +1,47 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_x86_vc_x86enc_H)
+# define _x86_vc_x86enc_H (1)
+# include "../encint.h"
+# include "x86int.h"
+
+void oc_enc_vtable_init_x86(oc_enc_ctx *_enc);
+
+unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
+ const unsigned char *_x,const unsigned char *_y,int _stride);
+void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
+ const unsigned char *_x,int _stride);
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif

Modified: trunk/theora/lib/x86_vc/x86int.h
===================================================================
--- trunk/theora/lib/dec/x86_vc/x86int.h	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/x86_vc/x86int.h	2009-08-06 01:43:12 UTC (rev 16443)
@@ -17,7 +17,7 @@
 
 #if !defined(_x86_vc_x86int_H)
 # define _x86_vc_x86int_H (1)
-# include "../../internal.h"
+# include "../internal.h"
 
 void oc_state_vtable_init_x86(oc_theora_state *_state);
 

Modified: trunk/theora/lib/x86_vc/x86state.c
===================================================================
--- trunk/theora/lib/dec/x86_vc/x86state.c	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/lib/x86_vc/x86state.c	2009-08-06 01:43:12 UTC (rev 16443)
@@ -19,7 +19,7 @@
 
 #if defined(OC_X86_ASM)
 
-#include "../../cpu.c"
+#include "../cpu.c"
 
 /*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
    each quadrant of the destination.*/

Modified: trunk/theora/win32/VS2005/libtheora/libtheora_dynamic.vcproj
===================================================================
--- trunk/theora/win32/VS2005/libtheora/libtheora_dynamic.vcproj	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/win32/VS2005/libtheora/libtheora_dynamic.vcproj	2009-08-06 01:43:12 UTC (rev 16443)
@@ -53,7 +53,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;OC_X86_ASM"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -130,7 +130,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -208,7 +208,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -284,7 +284,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -360,7 +360,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -438,7 +438,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;OC_X86_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -522,7 +522,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -607,7 +607,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -691,7 +691,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -775,7 +775,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -857,7 +857,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -942,7 +942,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1027,7 +1027,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1111,7 +1111,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1195,7 +1195,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1277,7 +1277,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1362,7 +1362,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1447,7 +1447,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1531,7 +1531,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1615,7 +1615,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1714,27 +1714,27 @@
 				Name="enc"
 				>
 				<File
-					RelativePath="..\..\..\lib\enc\analyze.c"
+					RelativePath="..\..\..\lib\analyze.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encapiwrapper.c"
+					RelativePath="..\..\..\lib\encapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encfrag.c"
+					RelativePath="..\..\..\lib\encfrag.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encinfo.c"
+					RelativePath="..\..\..\lib\encinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encint.h"
+					RelativePath="..\..\..\lib\encint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encode.c"
+					RelativePath="..\..\..\lib\encode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1942,7 +1942,7 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_disabled.c"
+					RelativePath="..\..\..\lib\encoder_disabled.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2106,62 +2106,62 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.c"
+					RelativePath="..\..\..\lib\enquant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\fdct.c"
+					RelativePath="..\..\..\lib\fdct.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.c"
+					RelativePath="..\..\..\lib\huffenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.h"
+					RelativePath="..\..\..\lib\huffenc.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.c"
+					RelativePath="..\..\..\lib\mathops.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.h"
+					RelativePath="..\..\..\lib\mathops.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mcenc.c"
+					RelativePath="..\..\..\lib\mcenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mode_select.h"
+					RelativePath="..\..\..\lib\mode_select.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\rate.c"
+					RelativePath="..\..\..\lib\rate.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\tokenize.c"
+					RelativePath="..\..\..\lib\tokenize.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxencfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxencfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxfdct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfdct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\x86enc.c"
+						RelativePath="..\..\..\lib\x86_vc\x86enc.c"
 						>
 					</File>
 				</Filter>
@@ -2170,39 +2170,39 @@
 				Name="dec"
 				>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.c"
+					RelativePath="..\..\..\lib\apiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.h"
+					RelativePath="..\..\..\lib\apiwrapper.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.c"
+					RelativePath="..\..\..\lib\bitpack.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.h"
+					RelativePath="..\..\..\lib\bitpack.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dct.h"
+					RelativePath="..\..\..\lib\dct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decapiwrapper.c"
+					RelativePath="..\..\..\lib\decapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decinfo.c"
+					RelativePath="..\..\..\lib\decinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decint.h"
+					RelativePath="..\..\..\lib\decint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decode.c"
+					RelativePath="..\..\..\lib\decode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2410,35 +2410,35 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.c"
+					RelativePath="..\..\..\lib\dequant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.h"
+					RelativePath="..\..\..\lib\dequant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\fragment.c"
+					RelativePath="..\..\..\lib\fragment.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.c"
+					RelativePath="..\..\..\lib\huffdec.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.h"
+					RelativePath="..\..\..\lib\huffdec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffman.h"
+					RelativePath="..\..\..\lib\huffman.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.c"
+					RelativePath="..\..\..\lib\idct.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2646,23 +2646,23 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.h"
+					RelativePath="..\..\..\lib\idct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\info.c"
+					RelativePath="..\..\..\lib\info.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\internal.c"
+					RelativePath="..\..\..\lib\internal.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\ocintrin.h"
+					RelativePath="..\..\..\lib\ocintrin.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.c"
+					RelativePath="..\..\..\lib\quant.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2870,30 +2870,30 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.h"
+					RelativePath="..\..\..\lib\quant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\state.c"
+					RelativePath="..\..\..\lib\state.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxidct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxidct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxstate.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxstate.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\x86state.c"
+						RelativePath="..\..\..\lib\x86_vc\x86state.c"
 						>
 					</File>
 				</Filter>

Modified: trunk/theora/win32/VS2005/libtheora/libtheora_static.vcproj
===================================================================
--- trunk/theora/win32/VS2005/libtheora/libtheora_static.vcproj	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/win32/VS2005/libtheora/libtheora_static.vcproj	2009-08-06 01:43:12 UTC (rev 16443)
@@ -53,7 +53,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;OC_X86_ASM"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -118,7 +118,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -184,7 +184,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -257,7 +257,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -330,7 +330,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -405,7 +405,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;OC_X86_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -477,7 +477,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -550,7 +550,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -631,7 +631,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -712,7 +712,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -791,7 +791,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -864,7 +864,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -937,7 +937,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1018,7 +1018,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1099,7 +1099,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1178,7 +1178,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1251,7 +1251,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1324,7 +1324,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1405,7 +1405,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1486,7 +1486,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1578,27 +1578,27 @@
 				Name="enc"
 				>
 				<File
-					RelativePath="..\..\..\lib\enc\analyze.c"
+					RelativePath="..\..\..\lib\analyze.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encapiwrapper.c"
+					RelativePath="..\..\..\lib\encapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encfrag.c"
+					RelativePath="..\..\..\lib\encfrag.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encinfo.c"
+					RelativePath="..\..\..\lib\encinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encint.h"
+					RelativePath="..\..\..\lib\encint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encode.c"
+					RelativePath="..\..\..\lib\encode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1806,7 +1806,7 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_disabled.c"
+					RelativePath="..\..\..\lib\encoder_disabled.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1970,70 +1970,70 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_huffman.h"
+					RelativePath="..\..\..\lib\encoder_huffman.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_lookup.h"
+					RelativePath="..\..\..\lib\encoder_lookup.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.c"
+					RelativePath="..\..\..\lib\enquant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\fdct.c"
+					RelativePath="..\..\..\lib\fdct.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.c"
+					RelativePath="..\..\..\lib\huffenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.h"
+					RelativePath="..\..\..\lib\huffenc.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.c"
+					RelativePath="..\..\..\lib\mathops.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.h"
+					RelativePath="..\..\..\lib\mathops.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mcenc.c"
+					RelativePath="..\..\..\lib\mcenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mode_select.h"
+					RelativePath="..\..\..\lib\mode_select.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\rate.c"
+					RelativePath="..\..\..\lib\rate.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\tokenize.c"
+					RelativePath="..\..\..\lib\tokenize.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxencfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxencfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxfdct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfdct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\x86enc.c"
+						RelativePath="..\..\..\lib\x86_vc\x86enc.c"
 						>
 					</File>
 				</Filter>
@@ -2042,39 +2042,39 @@
 				Name="dec"
 				>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.c"
+					RelativePath="..\..\..\lib\apiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.h"
+					RelativePath="..\..\..\lib\apiwrapper.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.c"
+					RelativePath="..\..\..\lib\bitpack.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.h"
+					RelativePath="..\..\..\lib\bitpack.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dct.h"
+					RelativePath="..\..\..\lib\dct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decapiwrapper.c"
+					RelativePath="..\..\..\lib\decapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decinfo.c"
+					RelativePath="..\..\..\lib\decinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decint.h"
+					RelativePath="..\..\..\lib\decint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decode.c"
+					RelativePath="..\..\..\lib\decode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2282,35 +2282,35 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.c"
+					RelativePath="..\..\..\lib\dequant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.h"
+					RelativePath="..\..\..\lib\dequant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\fragment.c"
+					RelativePath="..\..\..\lib\fragment.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.c"
+					RelativePath="..\..\..\lib\huffdec.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.h"
+					RelativePath="..\..\..\lib\huffdec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffman.h"
+					RelativePath="..\..\..\lib\huffman.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.c"
+					RelativePath="..\..\..\lib\idct.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2518,23 +2518,23 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.h"
+					RelativePath="..\..\..\lib\idct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\info.c"
+					RelativePath="..\..\..\lib\info.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\internal.c"
+					RelativePath="..\..\..\lib\internal.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\ocintrin.h"
+					RelativePath="..\..\..\lib\ocintrin.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.c"
+					RelativePath="..\..\..\lib\quant.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2742,18 +2742,18 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.h"
+					RelativePath="..\..\..\lib\quant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\state.c"
+					RelativePath="..\..\..\lib\state.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfrag.c"
 						>
 						<FileConfiguration
 							Name="Debug|x64"
@@ -2765,7 +2765,7 @@
 						</FileConfiguration>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxidct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxidct.c"
 						>
 						<FileConfiguration
 							Name="Debug|x64"
@@ -2777,7 +2777,7 @@
 						</FileConfiguration>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxstate.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxstate.c"
 						>
 						<FileConfiguration
 							Name="Debug|x64"
@@ -2789,7 +2789,7 @@
 						</FileConfiguration>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\x86state.c"
+						RelativePath="..\..\..\lib\x86_vc\x86state.c"
 						>
 						<FileConfiguration
 							Name="Debug|x64"

Modified: trunk/theora/win32/VS2008/libtheora/libtheora_dynamic.vcproj
===================================================================
--- trunk/theora/win32/VS2008/libtheora/libtheora_dynamic.vcproj	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/win32/VS2008/libtheora/libtheora_dynamic.vcproj	2009-08-06 01:43:12 UTC (rev 16443)
@@ -54,7 +54,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;OC_X86_ASM"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -130,7 +130,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -209,7 +209,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;OC_X86_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -292,7 +292,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -374,7 +374,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -458,7 +458,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -540,7 +540,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -624,7 +624,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -704,7 +704,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -780,7 +780,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -856,7 +856,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -936,7 +936,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1020,7 +1020,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1104,7 +1104,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1188,7 +1188,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1272,7 +1272,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1356,7 +1356,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1440,7 +1440,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1524,7 +1524,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1608,7 +1608,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1707,31 +1707,31 @@
 				Name="enc"
 				>
 				<File
-					RelativePath="..\..\..\lib\enc\analyze.c"
+					RelativePath="..\..\..\lib\analyze.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encapiwrapper.c"
+					RelativePath="..\..\..\lib\encapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encfrag.c"
+					RelativePath="..\..\..\lib\encfrag.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encinfo.c"
+					RelativePath="..\..\..\lib\encinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encint.h"
+					RelativePath="..\..\..\lib\encint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encode.c"
+					RelativePath="..\..\..\lib\encode.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_disabled.c"
+					RelativePath="..\..\..\lib\encoder_disabled.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1799,62 +1799,62 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.c"
+					RelativePath="..\..\..\lib\enquant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\fdct.c"
+					RelativePath="..\..\..\lib\fdct.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.c"
+					RelativePath="..\..\..\lib\huffenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.h"
+					RelativePath="..\..\..\lib\huffenc.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.c"
+					RelativePath="..\..\..\lib\mathops.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.h"
+					RelativePath="..\..\..\lib\mathops.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mcenc.c"
+					RelativePath="..\..\..\lib\mcenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\modedec.h"
+					RelativePath="..\..\..\lib\modedec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\rate.c"
+					RelativePath="..\..\..\lib\rate.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\tokenize.c"
+					RelativePath="..\..\..\lib\tokenize.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxencfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxencfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxfdct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfdct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\x86enc.c"
+						RelativePath="..\..\..\lib\x86_vc\x86enc.c"
 						>
 					</File>
 				</Filter>
@@ -1863,39 +1863,39 @@
 				Name="dec"
 				>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.c"
+					RelativePath="..\..\..\lib\apiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.h"
+					RelativePath="..\..\..\lib\apiwrapper.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.c"
+					RelativePath="..\..\..\lib\bitpack.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.h"
+					RelativePath="..\..\..\lib\bitpack.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dct.h"
+					RelativePath="..\..\..\lib\dct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decapiwrapper.c"
+					RelativePath="..\..\..\lib\decapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decinfo.c"
+					RelativePath="..\..\..\lib\decinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decint.h"
+					RelativePath="..\..\..\lib\decint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decode.c"
+					RelativePath="..\..\..\lib\decode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2103,35 +2103,35 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.c"
+					RelativePath="..\..\..\lib\dequant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.h"
+					RelativePath="..\..\..\lib\dequant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\fragment.c"
+					RelativePath="..\..\..\lib\fragment.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.c"
+					RelativePath="..\..\..\lib\huffdec.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.h"
+					RelativePath="..\..\..\lib\huffdec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffman.h"
+					RelativePath="..\..\..\lib\huffman.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.c"
+					RelativePath="..\..\..\lib\idct.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2339,23 +2339,23 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.h"
+					RelativePath="..\..\..\lib\idct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\info.c"
+					RelativePath="..\..\..\lib\info.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\internal.c"
+					RelativePath="..\..\..\lib\internal.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\ocintrin.h"
+					RelativePath="..\..\..\lib\ocintrin.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.c"
+					RelativePath="..\..\..\lib\quant.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2563,30 +2563,30 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.h"
+					RelativePath="..\..\..\lib\quant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\state.c"
+					RelativePath="..\..\..\lib\state.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxidct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxidct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxstate.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxstate.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\x86state.c"
+						RelativePath="..\..\..\lib\x86_vc\x86state.c"
 						>
 					</File>
 				</Filter>

Modified: trunk/theora/win32/VS2008/libtheora/libtheora_static.vcproj
===================================================================
--- trunk/theora/win32/VS2008/libtheora/libtheora_static.vcproj	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/win32/VS2008/libtheora/libtheora_static.vcproj	2009-08-06 01:43:12 UTC (rev 16443)
@@ -54,7 +54,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;OC_X86_ASM"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -119,7 +119,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -187,7 +187,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;OC_X86_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -259,7 +259,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -330,7 +330,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -403,7 +403,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -474,7 +474,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -547,7 +547,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_BIND_TO_CURRENT_CRT_VERSION;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -616,7 +616,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -689,7 +689,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -762,7 +762,7 @@
 				Name="VCCLCompilerTool"
 				ExecutionBucket="7"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;DEBUG;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				MinimalRebuild="true"
 				RuntimeLibrary="1"
@@ -839,7 +839,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -920,7 +920,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1001,7 +1001,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1082,7 +1082,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1163,7 +1163,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1244,7 +1244,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1325,7 +1325,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1406,7 +1406,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1487,7 +1487,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\lib\enc;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\lib;..\..\..\..\libogg\include;..\..\..\..\..\..\..\core\ogg\libogg\include\"
 				PreprocessorDefinitions="_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS;_WIN32_WCE=$(CEVER);UNDER_CE;WINCE;$(ARCHFAM);$(_ARCHFAM_);$(PLATFORMDEFINES)"
 				StringPooling="true"
 				ExceptionHandling="0"
@@ -1579,31 +1579,31 @@
 				Name="enc"
 				>
 				<File
-					RelativePath="..\..\..\lib\enc\analyze.c"
+					RelativePath="..\..\..\lib\analyze.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encapiwrapper.c"
+					RelativePath="..\..\..\lib\encapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encfrag.c"
+					RelativePath="..\..\..\lib\encfrag.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encinfo.c"
+					RelativePath="..\..\..\lib\encinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encint.h"
+					RelativePath="..\..\..\lib\encint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encode.c"
+					RelativePath="..\..\..\lib\encode.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\encoder_disabled.c"
+					RelativePath="..\..\..\lib\encoder_disabled.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1671,62 +1671,62 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.c"
+					RelativePath="..\..\..\lib\enquant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\fdct.c"
+					RelativePath="..\..\..\lib\fdct.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.c"
+					RelativePath="..\..\..\lib\huffenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\huffenc.h"
+					RelativePath="..\..\..\lib\huffenc.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.c"
+					RelativePath="..\..\..\lib\mathops.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mathops.h"
+					RelativePath="..\..\..\lib\mathops.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\mcenc.c"
+					RelativePath="..\..\..\lib\mcenc.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\modedec.h"
+					RelativePath="..\..\..\lib\modedec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\rate.c"
+					RelativePath="..\..\..\lib\rate.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\enc\tokenize.c"
+					RelativePath="..\..\..\lib\tokenize.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxencfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxencfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\mmxfdct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfdct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\enc\x86_vc\x86enc.c"
+						RelativePath="..\..\..\lib\x86_vc\x86enc.c"
 						>
 					</File>
 				</Filter>
@@ -1735,39 +1735,39 @@
 				Name="dec"
 				>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.c"
+					RelativePath="..\..\..\lib\apiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\apiwrapper.h"
+					RelativePath="..\..\..\lib\apiwrapper.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.c"
+					RelativePath="..\..\..\lib\bitpack.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\bitpack.h"
+					RelativePath="..\..\..\lib\bitpack.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dct.h"
+					RelativePath="..\..\..\lib\dct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decapiwrapper.c"
+					RelativePath="..\..\..\lib\decapiwrapper.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decinfo.c"
+					RelativePath="..\..\..\lib\decinfo.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decint.h"
+					RelativePath="..\..\..\lib\decint.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\decode.c"
+					RelativePath="..\..\..\lib\decode.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -1975,35 +1975,35 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.c"
+					RelativePath="..\..\..\lib\dequant.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\dequant.h"
+					RelativePath="..\..\..\lib\dequant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\enquant.h"
+					RelativePath="..\..\..\lib\enquant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\fragment.c"
+					RelativePath="..\..\..\lib\fragment.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.c"
+					RelativePath="..\..\..\lib\huffdec.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffdec.h"
+					RelativePath="..\..\..\lib\huffdec.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\huffman.h"
+					RelativePath="..\..\..\lib\huffman.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.c"
+					RelativePath="..\..\..\lib\idct.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2211,23 +2211,23 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\idct.h"
+					RelativePath="..\..\..\lib\idct.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\info.c"
+					RelativePath="..\..\..\lib\info.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\internal.c"
+					RelativePath="..\..\..\lib\internal.c"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\ocintrin.h"
+					RelativePath="..\..\..\lib\ocintrin.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.c"
+					RelativePath="..\..\..\lib\quant.c"
 					>
 					<FileConfiguration
 						Name="Debug|Win32"
@@ -2435,30 +2435,30 @@
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\quant.h"
+					RelativePath="..\..\..\lib\quant.h"
 					>
 				</File>
 				<File
-					RelativePath="..\..\..\lib\dec\state.c"
+					RelativePath="..\..\..\lib\state.c"
 					>
 				</File>
 				<Filter
 					Name="x86_vc"
 					>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxfrag.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxfrag.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxidct.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxidct.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\mmxstate.c"
+						RelativePath="..\..\..\lib\x86_vc\mmxstate.c"
 						>
 					</File>
 					<File
-						RelativePath="..\..\..\lib\dec\x86_vc\x86state.c"
+						RelativePath="..\..\..\lib\x86_vc\x86state.c"
 						>
 					</File>
 				</Filter>

Modified: trunk/theora/win32/xmingw32/Makefile
===================================================================
--- trunk/theora/win32/xmingw32/Makefile	2009-08-06 00:38:30 UTC (rev 16442)
+++ trunk/theora/win32/xmingw32/Makefile	2009-08-06 01:43:12 UTC (rev 16443)
@@ -33,7 +33,8 @@
 # The compiler tools to use
 # The is no standard mingw prefix, so try to guess
 MINGW_PREFIX := $(or $(strip $(foreach exeprefix, \
-	i686-mingw32 i586-mingw32msvc i386-mingw32 no-mingw32, \
+	i686-mingw32 i686-pc-mingw32 i586-mingw32msvc i386-mingw32 \
+	no-mingw32, \
 	$(if $(shell which $(exeprefix)-gcc 2>/dev/null), $(exeprefix) ))))
 CC = $(MINGW_PREFIX)-gcc
 RC = $(MINGW_PREFIX)-windres
@@ -70,53 +71,53 @@
 # C source file lists
 
 LIBTHEORADEC_CSOURCES = \
-dec/apiwrapper.c \
-dec/bitpack.c \
-dec/decapiwrapper.c \
-dec/decinfo.c \
-dec/decode.c \
-dec/dequant.c \
-dec/fragment.c \
-dec/huffdec.c \
-dec/idct.c \
-dec/info.c \
-dec/internal.c \
-dec/quant.c \
-dec/state.c \
+apiwrapper.c \
+bitpack.c \
+decapiwrapper.c \
+decinfo.c \
+decode.c \
+dequant.c \
+fragment.c \
+huffdec.c \
+idct.c \
+info.c \
+internal.c \
+quant.c \
+state.c \
 $(if $(findstring -DOC_X86_ASM,${CFLAGS}), \
-dec/x86/mmxidct.c \
-dec/x86/mmxfrag.c \
-dec/x86/mmxstate.c \
-dec/x86/x86state.c \
+x86/mmxidct.c \
+x86/mmxfrag.c \
+x86/mmxstate.c \
+x86/x86state.c \
 )
 
 LIBTHEORAENC_CSOURCES = \
-dec/apiwrapper.c \
-dec/fragment.c \
-dec/idct.c \
-dec/internal.c \
-dec/state.c \
-dec/quant.c \
-enc/analyze.c \
-enc/fdct.c \
-enc/encfrag.c \
-enc/encapiwrapper.c \
-enc/encinfo.c \
-enc/encode.c \
-enc/enquant.c \
-enc/huffenc.c \
-enc/mathops.c \
-enc/mcenc.c \
-enc/rate.c \
-enc/tokenize.c \
+apiwrapper.c \
+fragment.c \
+idct.c \
+internal.c \
+state.c \
+quant.c \
+analyze.c \
+fdct.c \
+encfrag.c \
+encapiwrapper.c \
+encinfo.c \
+encode.c \
+enquant.c \
+huffenc.c \
+mathops.c \
+mcenc.c \
+rate.c \
+tokenize.c \
 $(if $(findstring -DOC_X86_ASM,${CFLAGS}), \
-dec/x86/mmxfrag.c \
-dec/x86/mmxidct.c \
-dec/x86/mmxstate.c \
-dec/x86/x86state.c \
-enc/x86/mmxencfrag.c \
-enc/x86/mmxfdct.c \
-enc/x86/x86enc.c \
+x86/mmxfrag.c \
+x86/mmxidct.c \
+x86/mmxstate.c \
+x86/x86state.c \
+x86/mmxencfrag.c \
+x86/mmxfdct.c \
+x86/x86enc.c \
 )
 
 
@@ -344,10 +345,7 @@
 # Remove all targets.
 clean:
 	-rm $(sort ${ALL_OBJS} ${ALL_DEPS} ${ALL_TARGETS} ${IMPLIB_TARGETS})
-	-rmdir ${WORKDIR}/enc/x86
-	-rmdir ${WORKDIR}/enc
-	-rmdir ${WORKDIR}/dec/x86
-	-rmdir ${WORKDIR}/dec
+	-rmdir ${WORKDIR}/x86
 	-rmdir ${WORKDIR}
 
 # Make everything depend on changes in the Makefile



More information about the commits mailing list