[xiph-commits] r10030 - in experimental/derf/theora-exp: examples
include/theora lib unix win32/msvc60
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Sat Sep 17 17:58:25 PDT 2005
Author: tterribe
Date: 2005-09-17 17:58:06 -0700 (Sat, 17 Sep 2005)
New Revision: 10030
Added:
experimental/derf/theora-exp/lib/encmsc.c
experimental/derf/theora-exp/lib/encvbr.c
experimental/derf/theora-exp/lib/encvbr.h
Modified:
experimental/derf/theora-exp/examples/dump_video.c
experimental/derf/theora-exp/include/theora/codec.h
experimental/derf/theora-exp/include/theora/theoraenc.h
experimental/derf/theora-exp/lib/bitrate.c
experimental/derf/theora-exp/lib/decode.c
experimental/derf/theora-exp/lib/encint.h
experimental/derf/theora-exp/lib/encode.c
experimental/derf/theora-exp/lib/fdct.c
experimental/derf/theora-exp/lib/fdct.h
experimental/derf/theora-exp/lib/huffdec.c
experimental/derf/theora-exp/lib/impmap.c
experimental/derf/theora-exp/lib/mcenc.c
experimental/derf/theora-exp/lib/ocintrin.h
experimental/derf/theora-exp/lib/psych.c
experimental/derf/theora-exp/lib/psych.h
experimental/derf/theora-exp/unix/Makefile
experimental/derf/theora-exp/win32/msvc60/dump_video.dsp
experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp
experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp
experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp
experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp
Log:
Encoder architecture improvements.
- Reconfigurable encoder pipeline elements have been added.
Still more work to be done on actually interleaving operations,
but the framework is there.
- A theora_encode_ctl() API for setting different encoding modes.
More modes need to be added (e.g., CBR)
- All VBR-specific code has been broken into its own module.
This gives it a cleaner separation from the packet assembly,
etc., code, and lets you see what parts need to be
re-implemented for a new encoding mode.
No new encoding modes have actually been added yet, and the VBR
mode is still not usable, though two small bug fixes are
included: the per-block QI values for I frames were being set
before the quantizers for the frame were actually chosen, and
the lowest contrast level was always being used in psych.c.
dump_video.c has also been fixed to be C90 compliant again.
In addition there were some minor documentation clean-ups, and
with my laptop dead I took the opportunity to update the Win32
project files.
Modified: experimental/derf/theora-exp/examples/dump_video.c
===================================================================
--- experimental/derf/theora-exp/examples/dump_video.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/examples/dump_video.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -174,10 +174,11 @@
int long_option_index;
int c;
+ int frames = 0;
+
FILE *infile = stdin;
outfile = stdout;
- int frames = 0;
#ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
/* Beware the evil ifdef. We avoid these where we can, but this one we
@@ -345,7 +346,7 @@
if(theora_decode_packetin(td,&op,&videobuf_granulepos)>=0){
videobuf_time=theora_granule_time(td,videobuf_granulepos);
videobuf_ready=1;
- frames++;
+ frames++;
}
}else
Modified: experimental/derf/theora-exp/include/theora/codec.h
===================================================================
--- experimental/derf/theora-exp/include/theora/codec.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/include/theora/codec.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -151,6 +151,8 @@
* the initial 'info' header packet.
* To initialize an encoder, the application fills in this structure and
* passes it to theora_encode_alloc().
+ * A default encoding mode is chosen based on the values of the #quality and
+ * #target_bitrate fields.
* On decode, it is filled in by theora_decode_headerin(), and then passed to
* theora_decode_alloc().
*
@@ -228,12 +230,18 @@
theora_colorspace colorspace;
/**The pixel format.*/
theora_pixel_fmt pixel_fmt;
- /**The target bit-rate in bits per second. */
+ /**The target bit-rate in bits per second.
+ If initializing an encoder with this struct, set this field to a non-zero
+ value to activate CBR encoding by default.*/
/*TODO: Current encoder does not support CBR mode, or anything like it.
We also don't really know what nominal rate each quality level
corresponds to yet.*/
int target_bitrate;
- /**The target quality level.*/
+ /**The target quality level.
+ Valid values range from 0 to 63, inclusive, with higher values giving
+ higher quality.
+ If initializing an encoder with this struct, and #target_bitrate is set
+ to zero, VBR encoding at this quality will be activated by default.*/
/*Currently this is set so that a qi of 0 corresponds to distortions of 24
times the JND, and each increase by 16 halves that value.
This gives us fine discrimination at low qualities, yet effective rate
Modified: experimental/derf/theora-exp/include/theora/theoraenc.h
===================================================================
--- experimental/derf/theora-exp/include/theora/theoraenc.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/include/theora/theoraenc.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -29,6 +29,53 @@
+/**The configuration information for the VBR encoding mode.
+ * This mode attempts to encode the video with a constant psychovisual
+ * quality.
+ * It can be enabled by calling theora_encode_ctl() with #OC_ENCCTL_SETUP_VBR.
+ * See the #theora_info struct documentation for details on how the default
+ * encoding mode is chosen.*/
+typedef struct theora_vbr_cfg{
+ /**The target quality index.
+ * Valid values range from 0 to 63, inclusive, with higher values giving
+ * higher quality.
+ * Note that, in this case, this corresponds to a <em>perceptual</em>
+ * quality, and does not translate directly into a quantization setting.
+ * Limits on the admissible quantizers can be controlled below.*/
+ int qi;
+ /**The minimum quality to use for a keyframe.
+ * Valid values range from 0 to 63, inclusive, with higher values giving
+ * higher quality.*/
+ int kf_qi_min;
+ /**The maximum quality to use for a keyframe.
+ * Valid values range from 0 to 63, inclusive, with higher values giving
+ * higher quality.
+ * This must be at least as large as #kf_qi_max.*/
+ int kf_qi_max;
+ /**The minimum quality to use for a delta frame.
+ * Valid values range from 0 to 63, inclusive, with higher values giving
+ * higher quality.*/
+ int df_qi_min;
+ /**The maximum quality to use for a delta frame.
+ * Valid values range from 0 to 63, inclusive, with higher values giving
+ * higher quality.
+ * This must be at least as large as #df_qi_max.*/
+ int df_qi_max;
+}theora_vbr_cfg;
+
+/**The configuration information for the constant QI encoding mode.
+ * This mode encodes the video with a constant quality index.
+ * This is the fastest encoding mode.
+ * It can be enabled by calling theora_encode_ctl() with #OC_ENCCTL_SETUP_CQI.
+ * See the #theora_info struct documentation for details on how the default
+ * encoding mode is chosen.*/
+typedef struct theora_cqi_cfg{
+ /**The target quality index.
+ Valid values range from 0 to 63, inclusive, with higher values giving
+ higher quality.*/
+ int qi;
+}theora_cqi_cfg;
+
/**\name theora_encode_ctl() codes
* \anchor encctlcodes
* These are the available request codes for theora_encode_ctl().
@@ -41,7 +88,7 @@
* this call.
* <tt>NULL</tt> may be specified to revert to the default tables.
*
- * \param[in] _buf <tt>theora_huff_code[#OC_NHUFFMAN_TABLES][#OC_NDCT_TOKENS]</tt>
+ * \param[in] _buf <tt>#theora_huff_code[#OC_NHUFFMAN_TABLES][#OC_NDCT_TOKENS]</tt>
* \retval OC_FAULT \a _enc_ctx is <tt>NULL</tt>.
* \retval OC_EINVAL Encoding has already begun or one or more of the given
* tables is not full or prefix-free, \a _buf is
@@ -88,7 +135,7 @@
* when any of the luma blocks in a macro block are not coded.
* It also includes using the VP3 quantization tables and Huffman codes; if you
* set them explicitly after calling this function, the resulting stream will
- * not by VP3-compatible.
+ * not be VP3-compatible.
* If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
* material, or when using a picture region smaller than the full frame (e.g.
* a non-multiple-of-16 width or height), then non-VP3 bitstream features will
@@ -102,7 +149,7 @@
* or 0 to disable it (the default).
* \param[out] _buf <tt>int</tt>: 1 if all bitstream features required for
* VP3-compatibility could be set, and 0 otherwise.
- * The latter will be returned if with pixel format is not
+ * The latter will be returned if the pixel format is not
* 4:2:0, the picture region is smaller than the full frame,
* or if encoding has begun, preventing the quantization
* tables and codebooks from being set.
@@ -110,6 +157,69 @@
* \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
* \retval OC_IMPL Not supported by this implementation.*/
#define OC_ENCCTL_SET_VP3_COMPATIBLE (10)
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ * may actually improve, but bitrate will also increase, and overall
+ * rate/distortion performance will likely decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ * the current encoding mode (VBR vs. CQI, etc.).
+ *
+ * \param[out] _buf int: The maximum encoding speed level.
+ * \retval OC_FAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define OC_ENCCTL_GET_SPLEVEL_MAX (12)
+/**Sets the speed level.
+ * By default, the slowest speed (0) is used.
+ *
+ * \param[in] _buf int: The new encoding speed level.
+ * 0 is slowest, larger values use less CPU.
+ * \retval OC_FAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ * encoding speed level is out of bounds.
+ * The maximum encoding speed level may be
+ * implementation- and encoding mode-specific, and can be
+ * obtained via #OC_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval OC_IMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define OC_ENCCTL_SET_SPLEVEL (14)
+/**Puts the encoder in VBR mode.
+ * This can be done at any time during the encoding process, with different
+ * configuration parameters, to encode different regions of the video segment
+ * with different qualities.
+ * See the #theora_info struct documentation for details on how the default
+ * encoding mode is chosen.
+ *
+ * \param[in] _buf <tt>#theora_vbr_cfg</tt>: the configuration parameters.
+ * This may be <tt>NULL</tt>, in which the current VBR
+ * configuration is unchanged.
+ * The default is to use the QI setting passed in via the
+ * #theora_info struct when the encoder was initialized, with
+ * a full range of admissible quantizers.
+ * \retval OC_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval OC_EINVAL The configuration parameters do not meet one of their
+ * stated requirements, \a _buf is <tt>NULL</tt> and
+ * \a _buf_sz is not zero, or \a _buf is non-<tt>NULL</tt>
+ * and \a _buf_sz is not <tt>sizeof(#theora_vbr_cfg)</tt>.
+ * \retval OC_IMPL Not supported by this implementation.*/
+#define OC_ENCCTL_SETUP_VBR (16)
+/**Puts the encoder in CQI mode.
+ * This can be done at any time during the encoding process, with different QI
+ * values.
+ * See the #theora_info struct documentation for details on how the default
+ * encoding mode is chosen.
+ *
+ * \param[in] _buf <tt>#theora_cqi_cfg</tt>: the configuration parameters.
+ * This may be <tt>NULL</tt>, in which case the current CQI
+ * configuration is unchanged.
+ * The default is to use the QI setting passed in via the
+ * #theora_info struct when the encoder was initialized.
+ * \retval OC_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a _buf_sz is not <tt>sizeof(#theora_cqi_cfg)</tt>.
+ * \retval OC_IMPL Not supported by this implementation.*/
+#define OC_ENCCTL_SETUP_CQI (18)
/*@}*/
Modified: experimental/derf/theora-exp/lib/bitrate.c
===================================================================
--- experimental/derf/theora-exp/lib/bitrate.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/bitrate.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -8197,9 +8197,13 @@
}
};
+ogg_uint16_t OC_RES_DISTORTS[64][3][OC_NMODES][16];
+
#if defined(OC_BITRATE_STATS)
static ogg_int64_t OC_RES_BITRATE_ACCUM[64][3][OC_NMODES][16];
static int OC_RES_BITRATE_SAMPLES[64][3][OC_NMODES][16];
+static ogg_int64_t OC_RES_DISTORT_ACCUM[64][3][OC_NMODES][16];
+static ogg_int64_t OC_RES_DISTORT_SAMPLES[64][3][OC_NMODES][16];
#include <stdio.h>
@@ -8209,6 +8213,8 @@
if(in==NULL)return;
fread(OC_RES_BITRATE_ACCUM,sizeof(OC_RES_BITRATE_ACCUM),1,in);
fread(OC_RES_BITRATE_SAMPLES,sizeof(OC_RES_BITRATE_SAMPLES),1,in);
+ fread(OC_RES_DISTORT_ACCUM,sizeof(OC_RES_DISTORT_ACCUM),1,in);
+ fread(OC_RES_DISTORT_SAMPLES,sizeof(OC_RES_DISTORT_SAMPLES),1,in);
/*Update the current bitrate statistics in use.*/
{
int qi;
@@ -8221,9 +8227,16 @@
for(erri=0;erri<16;erri++){
int n;
n=OC_RES_BITRATE_SAMPLES[qi][pli][modei][erri];
- if(!n)continue;
- OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
- ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+ if(n!=0){
+ OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+ ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+ }
+ n=OC_RES_DISTORT_SAMPLES[qi][pli][modei][erri];
+ if(n!=0){
+ OC_RES_DISTORTS[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+ ((OC_RES_DISTORT_ACCUM[qi][pli][modei][erri]<<OC_DIS_SCALE+1)+n)/
+ (n<<1));
+ }
}
}
}
@@ -8236,6 +8249,8 @@
out=fopen("modedec.stats","wb");
fwrite(OC_RES_BITRATE_ACCUM,sizeof(OC_RES_BITRATE_ACCUM),1,out);
fwrite(OC_RES_BITRATE_SAMPLES,sizeof(OC_RES_BITRATE_SAMPLES),1,out);
+ fwrite(OC_RES_DISTORT_ACCUM,sizeof(OC_RES_DISTORT_ACCUM),1,out);
+ fwrite(OC_RES_DISTORT_SAMPLES,sizeof(OC_RES_DISTORT_SAMPLES),1,out);
}
void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]){
@@ -8251,6 +8266,7 @@
};
ogg_uint32_t eob_bits[64];
int eob_runs[64];
+ int ref_idx;
int pli;
int *coded_fragi;
int *coded_fragi_end;
@@ -8330,8 +8346,13 @@
bit count per block.*/
/*Go through the actual encoded tokens and assign the bits used by each to
the fragment(s) they came from.*/
+ ref_idx=_enc->state.ref_frame_idx[OC_FRAME_SELF];
coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
for(pli=0;pli<3;pli++){
+ int ref_ystride;
+ int cur_ystride;
+ cur_ystride=_enc->state.input[pli].ystride;
+ ref_ystride=_enc->state.ref_frame_bufs[ref_idx][pli].ystride;
coded_fragi_end+=_enc->state.ncoded_fragis[pli];
for(;coded_fragi<coded_fragi_end;coded_fragi++){
oc_fragment *frag;
@@ -8375,10 +8396,43 @@
}
err_bin=efrag->eerror>>(frag->mbmode==OC_MODE_INTRA?8:6);
err_bin=OC_MINI(15,err_bin);
- OC_RES_BITRATE_ACCUM[
- frag->invalid?0:frag->qi][pli][frag->mbmode][err_bin]+=frag_bits;
- OC_RES_BITRATE_SAMPLES[
- frag->invalid?0:frag->qi][pli][frag->mbmode][err_bin]++;
+ if(!frag->invalid){
+ unsigned char *cur;
+ unsigned char *ref;
+ int err;
+ int i;
+ int j;
+ OC_RES_BITRATE_ACCUM[frag->qi][pli][frag->mbmode][err_bin]+=frag_bits;
+ OC_RES_BITRATE_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]++;
+ err=0;
+ cur=frag->buffer[OC_FRAME_IO];
+ ref=frag->buffer[ref_idx];
+ if(frag->border!=NULL){
+ ogg_int64_t mask;
+ mask=frag->border->mask;
+ for(i=0;i<8;i++){
+ for(j=0;j<8;j++){
+ if(mask&1){
+ err+=abs(cur[j]-ref[j]);
+ }
+ mask>>=1;
+ }
+ cur+=cur_ystride;
+ ref+=ref_ystride;
+ }
+ OC_RES_DISTORT_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]+=
+ frag->border->npixels;
+ }
+ else{
+ for(i=0;i<8;i++){
+ for(j=0;j<8;j++)err+=abs(cur[j]-ref[j]);
+ cur+=cur_ystride;
+ ref+=ref_ystride;
+ }
+ OC_RES_DISTORT_SAMPLES[frag->qi][pli][frag->mbmode][err_bin]+=64;
+ }
+ OC_RES_DISTORT_ACCUM[frag->qi][pli][frag->mbmode][err_bin]+=err;
+ }
}
}
}
@@ -8393,9 +8447,16 @@
for(erri=0;erri<16;erri++){
int n;
n=OC_RES_BITRATE_SAMPLES[qi][pli][modei][erri];
- if(!n)continue;
- OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
- ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+ if(n!=0){
+ OC_RES_BITRATES[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+ ((OC_RES_BITRATE_ACCUM[qi][pli][modei][erri]<<1)+n)/(n<<1));
+ }
+ n=OC_RES_DISTORT_SAMPLES[qi][pli][modei][erri];
+ if(n!=0){
+ OC_RES_DISTORTS[qi][pli][modei][erri]=(ogg_uint16_t)OC_MINI(65535,
+ ((OC_RES_DISTORT_ACCUM[qi][pli][modei][erri]<<OC_DIS_SCALE+1)+n)/
+ (n<<1));
+ }
}
}
}
Modified: experimental/derf/theora-exp/lib/decode.c
===================================================================
--- experimental/derf/theora-exp/lib/decode.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/decode.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -1260,7 +1260,7 @@
size_t frame_sz;
frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height;
if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
- _dec->variances=(ogg_uint32_t *)_ogg_realloc(_dec->variances,
+ _dec->variances=(int *)_ogg_realloc(_dec->variances,
_dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
_dec->pp_frame_data=(unsigned char *)_ogg_realloc(
_dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
@@ -1474,6 +1474,8 @@
zzi=OC_MINI(zzi,64);
dct_coeffs[0]=(ogg_int16_t)frag->dc;
iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
+ /*last_zzi is always initialized.
+ If your compiler thinks otherwise, it is dumb.*/
oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
}
@@ -2103,13 +2105,13 @@
_dec->state.curframe_num++;
if(_granpos!=NULL)*_granpos=_dec->state.granpos;
}
+#if defined(OC_DUMP_IMAGES)
+ oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
return 0;
}
int theora_decode_ycbcr_out(theora_dec_ctx *_dec,theora_ycbcr_buffer _ycbcr){
oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
-#if defined(OC_DUMP_IMAGES)
- oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
-#endif
return 0;
}
Modified: experimental/derf/theora-exp/lib/encint.h
===================================================================
--- experimental/derf/theora-exp/lib/encint.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encint.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -4,12 +4,12 @@
# include "theora/theoraenc.h"
# include "internal.h"
+typedef struct oc_enc_pipe_stage oc_enc_pipe_stage;
typedef struct oc_fragment_enc_info oc_fragment_enc_info;
typedef struct oc_mb_enc_info oc_mb_enc_info;
typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
-typedef struct oc_impmap_ctx oc_impmap_ctx;
+typedef struct oc_enc_vbr_ctx oc_enc_vbr_ctx;
typedef struct oc_mcenc_ctx oc_mcenc_ctx;
-typedef struct oc_psych_ctx oc_psych_ctx;
typedef struct theora_enc_ctx oc_enc_ctx;
# include "fdct.h"
@@ -18,6 +18,22 @@
#define OC_1_LN2 (1.4426950408889634073F)
+/*Encoding modes.*/
+#define OC_ENC_MODE_VBR (0)
+#define OC_ENC_MODE_CQI (1)
+/*
+Not yet implemented:
+#define OC_ENC_MODE_CBR (2)
+#define OC_ENC_MODE_RDO (3)
+*/
+
+/*The function used to set the speed for the current encoding mode.
+ _speed: The encoding speed to use.
+ Higher values should provide faster encoding, at reduced
+ rate-distortion performance.
+ This will always be in the range [0..._enc->speed_max].*/
+typedef void (*oc_enc_set_speed_func)(oc_enc_ctx *_enc,int _speed);
+
/*Constants for the packet-out state machine specific to the encoder.*/
/*Next packet to emit: Data packet, but none are ready yet.*/
@@ -27,6 +43,32 @@
+/*An encoder pipeline stage.*/
+struct oc_enc_pipe_stage{
+ /*The encoder this pipeline stage belongs to.*/
+ oc_enc_ctx *enc;
+ /*The next stage in the pipeline.*/
+ oc_enc_pipe_stage *next;
+ /*The number of rows processed so far in each plane.*/
+ int y_procd[3];
+ /*Called before processing the first stripe.
+ This does not need to call the next stage's start function.
+ Return: 0 on success, or a negative value on error.*/
+ int (*pipe_start)(oc_enc_pipe_stage *_stage);
+ /*Called for each stripe as it becomes available.
+ This function is responsible for calling the next function in the chain.
+ It may do so in smaller or larger stripes than are passed to it, at its
+ discretion.
+ _y_avail: Rows 0 through _y_avail[pli] in plane pli will be available for
+ processing.
+ Return: 0 on success, or a negative value on error.*/
+ int (*pipe_proc)(oc_enc_pipe_stage *_stage,int _y_avail[3]);
+ /*Called after processing the last stripe.
+ This does not need to call the next stage's end function.
+ Return: 0 on success, or a negative value on error.*/
+ int (*pipe_end)(oc_enc_pipe_stage *_stage);
+};
+
/*Fragment information specific to encoding.*/
struct oc_fragment_enc_info{
/*The DCT coefficients for coding the fragment in intra mode.
@@ -107,6 +149,22 @@
struct theora_enc_ctx{
/*Shared encoder/decoder state.*/
oc_theora_state state;
+ /*The start of the encoder pipeline.*/
+ oc_enc_pipe_stage *pipe;
+ /*The maximum speed setting for the current encoding mode.*/
+ int speed_max;
+ /*The function used to set the speed level for the current encoding mode.*/
+ oc_enc_set_speed_func set_speed;
+ /*The INTRA fDCT pipe stage.*/
+ oc_enc_pipe_stage fdct_pipe;
+ /*The uncoded fragment copying pipe stage.*/
+ oc_enc_pipe_stage copy_pipe;
+ /*The loop filter pipe stage.*/
+ oc_enc_pipe_stage loop_pipe;
+ /*The border filling pipe stage.*/
+ oc_enc_pipe_stage fill_pipe;
+ /*The packet assembly pipe stage.*/
+ oc_enc_pipe_stage pack_pipe;
/*Whether or not packets are ready to be emitted.
This takes on negative values while there are remaining header packets to
be emitted, reaches 0 when the codec is ready for input, and goes to 1
@@ -123,36 +181,37 @@
int nblock_coded_flags;
/*Special buffer used for the coded fragment flags.*/
oggpack_buffer opb_coded_flags;
- /*The estimated bit cost of the current frame.*/
- int est_bits;
/*Encoder-specific fragment information.*/
oc_fragment_enc_info *frinfo;
/*Encoder-specific macro block information.*/
oc_mb_enc_info *mbinfo;
- /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
- unsigned dc_tol_mins[3];
+ /*Context information used to perform motion estimation.*/
+ oc_mcenc_ctx *mcenc;
+ /*Context information used for VBR encoding.*/
+ oc_enc_vbr_ctx *vbr;
/*The qi value lists selected for each potential frame type.*/
int qis[2][3];
/*The number of qi values in the list for each frame type.*/
int nqis[2];
+ /*The number of coded fragments.*/
+ int ncoded_frags;
+ /*The current uncoded_fragi index being copied to each plane.*/
+ int uncoded_fragii[3];
/*The macro-block mode scheme chooser.*/
oc_mode_scheme_chooser mode_scheme_chooser;
/*The motion vector scheme chosen.*/
int mv_scheme;
- /*Context information used to perform motion estimation.*/
- oc_mcenc_ctx *mcenc;
- /*Context information used to generate the importance map.*/
- oc_impmap_ctx *impmap;
- /*Context information used to generate low-level perceptual weightings.*/
- oc_psych_ctx *psych;
/*The maximum distance between keyframes.*/
ogg_uint32_t keyframe_frequency_force;
+ /*Whether or not VP3-compatibility is enabled.*/
+ int vp3_compatible;
+ /*Whether or not the loop filter is enabled.
+ This is determined each frame, based on the quantizer it is encoded with.*/
+ int loop_filter_enabled;
+ /*The bounding value array used for the loop filter.*/
+ int bounding_values[512];
/*The huffman tables in use.*/
theora_huff_code huff_codes[OC_NHUFFMAN_TABLES][OC_NDCT_TOKENS];
- /*The scale factor for the current quality setting.*/
- float qscale;
- /*Whether or not VP3-compatibility is enabled.*/
- int vp3_compatible;
/*The quantization parameters in use.*/
theora_quant_info qinfo;
/*Pointers to the quantization tables in use.*/
@@ -173,6 +232,28 @@
ogg_uint16_t **extra_bits;
};
+extern const int OC_MODE_SCHEMES[7][OC_NMODES];
+extern const int OC_DCT_VAL_CAT_SIZES[6];
+extern const int OC_DCT_VAL_CAT_SHIFTS[6];
+extern const int OC_MODE_HAS_MV[OC_NMODES];
+extern const theora_huff_code OC_MV_CODES[2][63];
+
+/*The number of fractional bits in bitrate statistics.*/
+#define OC_BIT_SCALE (7)
+/*The number of fractional bits in distortion statistics.*/
+#define OC_DIS_SCALE (9)
+
+/*Estimated bits needed to code a residual given the: quality index, color
+ plane, macro-block mode, and a SAD bin.
+ SAD values for a block are divided by 256 for INTRA mode and 64 for INTER
+ modes to find the appropriate bin.*/
+extern ogg_uint16_t OC_RES_BITRATES[64][3][OC_NMODES][16];
+
+#if defined(OC_BITRATE_STATS)
+void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]);
+#endif
+
+
int oc_sad8_fullpel(const unsigned char *_cur,int _cur_stride,
const unsigned char *_ref,int _ref_stride);
int oc_sad8_fullpel_border(const unsigned char *_cur,int _cur_stride,
@@ -183,34 +264,33 @@
const unsigned char *_ref0,const unsigned char *_ref1,int _ref_stride,
ogg_int64_t _mask);
-void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
- ogg_int16_t _dct_vals[64],int _ystride,int _framei);
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser);
+int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode);
+void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mode);
+void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
+ int _mode_counts[OC_NMODES]);
oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc);
void oc_mcenc_free(oc_mcenc_ctx *_mcenc);
-void oc_mcenc_analyze(oc_mcenc_ctx *_mcenc);
int oc_mcenc_search_1mv(oc_mcenc_ctx *_mcenc,int _mbi,int _frame);
+oc_enc_pipe_stage *oc_mcenc_prepend_to_pipe(oc_mcenc_ctx *_mcenc,
+ oc_enc_pipe_stage *_next);
-oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
-void oc_impmap_free(oc_impmap_ctx *_impmap);
-void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration);
+oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc);
+void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr);
+int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg);
-oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
-void oc_psych_free(oc_psych_ctx *_psych);
-void oc_psych_scan(oc_psych_ctx *_psych,float _contrast);
+void oc_enc_set_speed_null(oc_enc_ctx *_enc,int _speed);
+void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
+ ogg_int16_t _dct_vals[64],int _ystride,int _framei);
+int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
+ int _dy,int _pli,int _frame);
+int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb);
+void oc_enc_do_inter_dcts(oc_enc_ctx *_enc);
+void oc_enc_merge_eob_runs(oc_enc_ctx *_enc);
-/*The number of fractional bits in bitrate statistics.*/
-#define OC_BIT_SCALE (7)
-
-/*Estimated bits needed to code a residual given the: quality index, color
- plane, macro-block mode, and a SAD bin.
- SAD values for a block are divided by 256 for INTRA mode and 64 for INTER
- modes to find the appropriate bin.*/
-extern ogg_uint16_t OC_RES_BITRATES[64][3][OC_NMODES][16];
-
-#if defined(OC_BITRATE_STATS)
-void oc_bitrate_update_stats(oc_enc_ctx *_enc,int _huff_idxs[5][3]);
#endif
-
-
-#endif
Added: experimental/derf/theora-exp/lib/encmsc.c
===================================================================
--- experimental/derf/theora-exp/lib/encmsc.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encmsc.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,234 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encint.h"
+
+
+
+/*The VLC code used for mode schemes 0-6.*/
+static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
+ {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
+};
+
+/*The CLC code used for mode scheme 7.*/
+static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
+ {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+ This need only be called once per encoder.
+ This is probably the best place to describe the various scheme's Theora uses
+ to encode macro block modes.
+ There are 8 possible schemes.
+ Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
+ The same set of Huffman codes is used for each of these 7 schemes, but the
+ mode assigned to each code varies.
+ Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
+ 0 writes a custom mapping to the bitstream before all the modes.
+ Finally, scheme 7 just encodes each mode directly in 3 bits.
+ Be warned that the number assigned to each mode is slightly different in the
+ bitstream than in this implementation, so a translation needs to be done.
+
+ Mode name: Source-code index; Bit-stream index:
+ OC_MODE_INTRA 0 1
+ OC_MODE_INTER_NOMV 1 0
+ OC_MODE_INTER_MV 2 2
+ OC_MODE_INTER_MV_LAST 3 3
+ OC_MODE_INTER_MV_LAST2 4 4
+ OC_MODE_INTER_MV_FOUR 5 6
+ OC_MODE_GOLDEN_NOMV 6 7
+ OC_MODE_GOLDEN_MV 7 5
+
+ The bit stream indices come from the constants assigned to each mode in the
+ original VP3 source.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+ int msi;
+ _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+ for(msi=0;msi<7;msi++){
+ _chooser->mode_codes[msi]=OC_MODE_CODESA;
+ _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
+ }
+ _chooser->mode_codes[7]=OC_MODE_CODESB;
+}
+
+/*Reset the mode scheme chooser.
+ This needs to be called once for each frame, including the first.*/
+void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+ int i;
+ memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
+ /*Scheme 0 starts with 24 bits to store the mode list in.*/
+ _chooser->scheme_bits[0]=24;
+ memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
+ for(i=0;i<8;i++){
+ /*Scheme 7 should always start first, and scheme 0 should always start
+ last.*/
+ _chooser->scheme_list[i]=7-i;
+ _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
+ }
+}
+
+/*This is the real purpose of this data structure: not actually selecting a
+ mode scheme, but estimating the cost of coding a given mode given all the
+ modes selected so far.
+ This is done via opportunity cost: the cost is defined as the number of bits
+ required to encode all the modes selected so far including the current one
+ using the best possible scheme, minus the number of bits required to encode
+ all the modes selected so far not including the current one using the best
+ possible scheme.
+ The computational expense of doing this probably makes it overkill.
+ Just be happy we take a greedy approach instead of trying to solve the
+ global mode-selection problem (which is NP-hard).
+ _mode: The mode to determine the cost of.
+ Return: The number of bits required to code this mode.*/
+int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,int _mode){
+ int scheme0;
+ int scheme1;
+ int si;
+ int scheme_bits;
+ int best_bits;
+ int mode_bits;
+ scheme0=_chooser->scheme_list[0];
+ scheme1=_chooser->scheme_list[1];
+ best_bits=_chooser->scheme_bits[scheme0];
+ mode_bits=_chooser->mode_codes[scheme0][
+ _chooser->mode_ranks[scheme0][_mode]].nbits;
+ /*Typical case: If the difference between the best scheme and the next best
+ is greater than 6 bits, then adding just one mode cannot change which
+ scheme we use.*/
+ if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+ /*Otherwise, check to see if adding this mode selects a different scheme
+ as the best.*/
+ si=1;
+ best_bits+=mode_bits;
+ do{
+ scheme1=_chooser->scheme_list[si];
+ /*For any scheme except 0, we can just use the bit cost of the mode's rank
+ in that scheme.*/
+ if(scheme1!=0){
+ scheme_bits=_chooser->scheme_bits[scheme1]+
+ _chooser->mode_codes[scheme1][
+ _chooser->mode_ranks[scheme1][_mode]].nbits;
+ }
+ else{
+ int ri;
+ /*For scheme 0, incrementing the mode count could potentially change the
+ mode's rank.
+ Find the index where the mode would be moved to in the optimal list,
+ and use its bit cost instead of the one for the mode's current
+ position in the list.*/
+ for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
+ _chooser->mode_counts[_mode]>=
+ _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+ scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
+ }
+ if(scheme_bits<best_bits)best_bits=scheme_bits;
+ si++;
+ }
+ while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
+ _chooser->scheme_bits[scheme0]<=6);
+ return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Update the mode counts and per-scheme bit counts and re-order the scheme
+ lists once a mode has been selected.
+ _mode: The mode that was chosen.*/
+void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mode){
+ int ri;
+ int si;
+ _chooser->mode_counts[_mode]++;
+ /*Re-order the scheme0 mode list if necessary.*/
+ for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
+ int pmode;
+ pmode=_chooser->scheme0_list[ri-1];
+ if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
+ _chooser->scheme0_ranks[pmode]++;
+ _chooser->scheme0_list[ri]=pmode;
+ }
+ _chooser->scheme0_ranks[_mode]=ri;
+ _chooser->scheme0_list[ri]=_mode;
+ /*Now add the bit cost for the mode to each scheme.*/
+ for(si=0;si<8;si++){
+ _chooser->scheme_bits[si]+=
+ _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
+ }
+ /*Finally, re-order the list of schemes.*/
+ for(si=1;si<8;si++){
+ int sj;
+ int scheme0;
+ int bits0;
+ scheme0=_chooser->scheme_list[si];
+ bits0=_chooser->scheme_bits[scheme0];
+ sj=si;
+ do{
+ int scheme1;
+ scheme1=_chooser->scheme_list[sj-1];
+ if(bits0>=_chooser->scheme_bits[scheme1])break;
+ _chooser->scheme_list[sj]=scheme1;
+ }
+ while(--sj>0);
+ _chooser->scheme_list[sj]=scheme0;
+ }
+}
+
+/*Update the count for each mode by the given amounts, and then re-rank the
+ schemes appropriately.
+ This allows fewer (e.g. 1) updates to be done, at the cost of a more
+ expensive update.
+ _mode_counts: The amount to add to each mode count.*/
+void oc_mode_scheme_chooser_add(oc_mode_scheme_chooser *_chooser,
+ int _mode_counts[OC_NMODES]){
+ int mi;
+ int mj;
+ int ri;
+ int rj;
+ int si;
+ for(mi=0;mi<OC_NMODES;mi++){
+ _chooser->mode_counts[mi]+=_mode_counts[mi];
+ }
+ /*Re-order the scheme0 mode list if necessary.*/
+ for(ri=1;ri<OC_NMODES;ri++){
+ mi=_chooser->scheme0_list[ri];
+ rj=ri;
+ do{
+ mj=_chooser->scheme0_list[rj-1];
+ if(_chooser->mode_counts[mj]>=_chooser->mode_counts[mi])break;
+ _chooser->scheme0_ranks[mj]++;
+ _chooser->scheme0_list[rj]=mj;
+ }
+ while(--rj>0);
+ _chooser->scheme0_ranks[mi]=rj;
+ _chooser->scheme0_list[rj]=mi;
+ }
+ /*Now recompute the bit cost for each scheme.*/
+ for(si=0;si<8;si++){
+ _chooser->scheme_bits[si]=0;
+ for(mi=0;mi<8;mi++){
+ _chooser->scheme_bits[si]+=
+ _chooser->mode_codes[si][_chooser->mode_ranks[si][mi]].nbits*
+ _chooser->mode_counts[mi];
+ }
+ }
+ /*Scheme 0 starts with 24 bits to store the mode list in.*/
+ _chooser->scheme_bits[0]+=24;
+ /*Finally, re-order the list of schemes.*/
+ for(si=1;si<8;si++){
+ int sj;
+ int scheme0;
+ int bits0;
+ scheme0=_chooser->scheme_list[si];
+ bits0=_chooser->scheme_bits[scheme0];
+ sj=si;
+ do{
+ int scheme1;
+ scheme1=_chooser->scheme_list[sj-1];
+ if(bits0>=_chooser->scheme_bits[scheme1])break;
+ _chooser->scheme_list[sj]=scheme1;
+ }
+ while(--sj>0);
+ _chooser->scheme_list[sj]=scheme0;
+ }
+}
Modified: experimental/derf/theora-exp/lib/encode.c
===================================================================
--- experimental/derf/theora-exp/lib/encode.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encode.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -9,7 +9,7 @@
/*The mode orderings for the various mode coding schemes.
Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const int OC_MODE_SCHEMES[7][OC_NMODES]={
+const int OC_MODE_SCHEMES[7][OC_NMODES]={
/*Last MV dominates.*/
/*L P M N I G GM 4*/
{4,3,2,0,1,7,5,6},
@@ -29,12 +29,20 @@
{1,0,2,3,4,7,5,6}
};
+/*The number of different DCT coefficient values that can be stored by each
+ of the different DCT value category tokens.*/
+const int OC_DCT_VAL_CAT_SIZES[6]={2,4,8,16,32,512};
+
+/*The number of bits to shift the sign of the DCT coefficient over by for each
+ of the different DCT value category tokens.*/
+const int OC_DCT_VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
+
/*Whether or not each mode has a motion vector associated with it.
Otherwise, the mode is assumed to use the 0,0 vector.*/
-static const int OC_MODE_HAS_MV[OC_NMODES]={0,0,1,1,1,1,0,1};
+const int OC_MODE_HAS_MV[OC_NMODES]={0,0,1,1,1,1,0,1};
/*The Huffman codes used for motion vectors.*/
-static const theora_huff_code OC_MV_CODES[2][63]={
+const theora_huff_code OC_MV_CODES[2][63]={
/*Scheme 1: VLC code.*/
{
{0xFF,8},{0xFD,8},{0xFB,8},{0xF9,8},{0xF7,8},{0xF5,8},{0xF3,8},
@@ -65,22 +73,6 @@
-static int oc_mvbitsa(int _dx,int _dy){
- return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
-}
-
-
-
-static const theora_huff_code OC_MODE_CODESA[OC_NMODES]={
- {0x00,1},{0x02,2},{0x06,3},{0x0E,4},{0x1E,5},{0x3E,6},{0x7E,7},{0x7F,7}
-};
-
-static const theora_huff_code OC_MODE_CODESB[OC_NMODES]={
- {0x00,3},{0x01,3},{0x02,3},{0x03,3},{0x04,3},{0x05,3},{0x06,3},{0x07,3}
-};
-
-
-
int oc_sad8_fullpel(const unsigned char *_cur,int _cur_ystride,
const unsigned char *_ref,int _ref_ystride){
int i;
@@ -211,165 +203,6 @@
-/*Initialize the mode scheme chooser.
- This need only be called once per encoder.
- This is probably the best place to describe the various scheme's Theora uses
- to encode macro block modes.
- There are 8 possible schemes.
- Schemes 0-6 use a highly unbalanced Huffman code to code each of the modes.
- The same set of Huffman codes is used for each of these 7 schemes, but the
- mode assigned to each code varies.
- Schemes 1-6 have a fixed mapping from Huffman code to MB mode, while scheme
- 0 writes a custom mapping to the bitstream before all the modes.
- Finally, scheme 7 just encodes each mode directly in 3 bits.
- Be warned that the number assigned to each mode is slightly different in the
- bitstream than in this implementation, so a translation needs to be done.
-
- Mode name: Source-code index; Bit-stream index:
- OC_MODE_INTRA 0 1
- OC_MODE_INTER_NOMV 1 0
- OC_MODE_INTER_MV 2 2
- OC_MODE_INTER_MV_LAST 3 3
- OC_MODE_INTER_MV_LAST2 4 4
- OC_MODE_INTER_MV_FOUR 5 6
- OC_MODE_GOLDEN_NOMV 6 7
- OC_MODE_GOLDEN_MV 7 5
-
- The bit stream indices come from the constants assigned to each mode in the
- original VP3 source.*/
-static void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
- int msi;
- _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
- for(msi=0;msi<7;msi++){
- _chooser->mode_codes[msi]=OC_MODE_CODESA;
- _chooser->mode_ranks[msi+1]=OC_MODE_SCHEMES[msi];
- }
- _chooser->mode_codes[7]=OC_MODE_CODESB;
-}
-
-/*Reset the mode scheme chooser.
- This needs to be called once for each frame, including the first.*/
-static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
- int i;
- memset(_chooser->mode_counts,0,sizeof(_chooser->mode_counts));
- /*Scheme 0 starts with 24 bits to store the mode list in.*/
- _chooser->scheme_bits[0]=24;
- memset(_chooser->scheme_bits+1,0,7*sizeof(_chooser->scheme_bits[1]));
- for(i=0;i<8;i++){
- /*Scheme 7 should always start first, and scheme 0 should always start
- last.*/
- _chooser->scheme_list[i]=7-i;
- _chooser->scheme0_list[i]=_chooser->scheme0_ranks[i]=i;
- }
-}
-
-/*This is the real purpose of this data structure: not actually selecting a
- mode scheme, but estimating the cost of coding a given mode given all the
- modes selected so far.
- This is done via opportunity cost: the cost is defined as the number of bits
- required to encode all the modes selected so far including the current one
- using the best possible scheme, minus the number of bits required to encode
- all the modes selected so far not including the current one using the best
- possible scheme.
- The computational expense of doing this probably makes it overkill.
- Just be happy we take a greedy approach instead of trying to solve the
- global mode-selection problem (which is NP-hard).
- _mode: The mode to determine the cost of.
- Return: The number of bits required to code this mode.*/
-static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
- int _mode){
- int scheme0;
- int scheme1;
- int si;
- int scheme_bits;
- int best_bits;
- int mode_bits;
- scheme0=_chooser->scheme_list[0];
- scheme1=_chooser->scheme_list[1];
- best_bits=_chooser->scheme_bits[scheme0];
- mode_bits=_chooser->mode_codes[scheme0][
- _chooser->mode_ranks[scheme0][_mode]].nbits;
- /*Typical case: If the difference between the best scheme and the next best
- is greater than 6 bits, then adding just one mode cannot change which
- scheme we use.*/
- if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
- /*Otherwise, check to see if adding this mode selects a different scheme
- as the best.*/
- si=1;
- best_bits+=mode_bits;
- do{
- scheme1=_chooser->scheme_list[si];
- /*For any scheme except 0, we can just use the bit cost of the mode's rank
- in that scheme.*/
- if(scheme1!=0){
- scheme_bits=_chooser->scheme_bits[scheme1]+
- _chooser->mode_codes[scheme1][
- _chooser->mode_ranks[scheme1][_mode]].nbits;
- }
- else{
- int ri;
- /*For scheme 0, incrementing the mode count could potentially change the
- mode's rank.
- Find the index where the mode would be moved to in the optimal list,
- and use its bit cost instead of the one for the mode's current
- position in the list.*/
- for(ri=_chooser->scheme0_ranks[_mode];ri>0&&
- _chooser->mode_counts[_mode]>=
- _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
- scheme_bits=_chooser->scheme_bits[0]+OC_MODE_CODESA[ri].nbits;
- }
- if(scheme_bits<best_bits)best_bits=scheme_bits;
- si++;
- }
- while(si<8&&_chooser->scheme_bits[_chooser->scheme_list[si]]-
- _chooser->scheme_bits[scheme0]<=6);
- return best_bits-_chooser->scheme_bits[scheme0];
-}
-
-/*Update the mode counts and per-scheme bit counts and re-order the scheme
- lists once a mode has been selected.
- _mode: The mode that was chosen.*/
-static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
- int _mode){
- int ri;
- int si;
- _chooser->mode_counts[_mode]++;
- /*Re-order the scheme0 mode list if necessary.*/
- for(ri=_chooser->scheme0_ranks[_mode];ri>0;ri--){
- int pmode;
- pmode=_chooser->scheme0_list[ri-1];
- if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mode])break;
- _chooser->scheme0_ranks[pmode]++;
- _chooser->scheme0_list[ri]=pmode;
- }
- _chooser->scheme0_ranks[_mode]=ri;
- _chooser->scheme0_list[ri]=_mode;
- /*Now add the bit cost for the mode to each scheme.*/
- for(si=0;si<8;si++){
- _chooser->scheme_bits[si]+=
- _chooser->mode_codes[si][_chooser->mode_ranks[si][_mode]].nbits;
- }
- /*Finally, re-order the list of schemes.*/
- for(si=1;si<8;si++){
- int sj;
- int scheme0;
- int bits0;
- scheme0=_chooser->scheme_list[si];
- bits0=_chooser->scheme_bits[scheme0];
- sj=si;
- do{
- int scheme1;
- scheme1=_chooser->scheme_list[sj-1];
- if(bits0>=_chooser->scheme_bits[scheme1])break;
- _chooser->scheme_list[sj]=scheme1;
- }
- while(--sj>0);
- _chooser->scheme_list[sj]=scheme0;
- }
-}
-
-
-
/*Initializes the macro block neighbor lists.
This assumes that the entire mbinfo memory region has been initialized with
zeros.
@@ -527,144 +360,6 @@
return 0;
}
-/*Select the set of quantizers to use for the current frame for each possible
- frame type (intra or inter).
- This does not assign a quantizer to each fragment, as that depends on the
- quantizer type used and thus is done during mode decision.*/
-static void oc_enc_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
- unsigned qmax[2][3];
- int fti;
- int qti;
- int pli;
- int dc_qi[2];
- /*The first quantizer value is used for DC coefficients.
- Select one that allows us to meet our quality requirements.*/
- for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
- qmax[qti][pli]=OC_MAXI(2U*_enc->dc_tol_mins[pli],OC_DC_QUANT_MIN[qti]);
- }
- /*For intra frames...(containing just INTRA fragments)*/
- for(dc_qi[0]=0;dc_qi[0]<63;dc_qi[0]++){
- if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
- _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
- _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
- break;
- }
- }
- /*For inter frames...(containing both INTER and INTRA fragments)*/
- if(!_intra_only){
- for(dc_qi[1]=dc_qi[0];dc_qi[1]<63;dc_qi[1]++){
- if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
- _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
- _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
- break;
- }
- }
- }
- /*Now we select a full qi list for each frame type.*/
- for(fti=0;fti<1+!_intra_only;fti++){
- oc_fragment_enc_info *efrag;
- int ncoded_fragis;
- int nqis[64];
- int qi;
- int qi0;
- int qi1;
- int qi2;
- /*Here we count up the number of fragments that can use each qi value.
- Unless we know this is an intra frame, we don't know what quantizer type
- will be used for each fragment, so we just count both of them.*/
- memset(nqis,0,sizeof(nqis));
- if(fti){
- int *coded_fragi;
- int *coded_fragi_end;
- coded_fragi=_enc->state.coded_fragis;
- ncoded_fragis=_enc->state.ncoded_fragis[0]+
- _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
- coded_fragi_end=coded_fragi+ncoded_fragis;
- for(;coded_fragi<coded_fragi_end;coded_fragi++){
- efrag=_enc->frinfo+*coded_fragi;
- for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
- }
- }
- else{
- oc_fragment_enc_info *efrag_end;
- ncoded_fragis=_enc->state.nfrags;
- efrag=_enc->frinfo;
- efrag_end=efrag+ncoded_fragis;
- for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
- }
- /*We'll now choose the qi values that divide the fragments into equally
- sized groups, or as close as we can make it.
- We account for the DC coefficients by adding an extra amount to the qi
- value they require.
- Since there are usually many more DC coefficients coded than any one AC
- coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
- nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
- /*Convert this into a moment table.*/
- for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
- for(qi0=64;qi0-->0&&nqis[qi0]<=0;);
- for(qi1=qi0-1;qi1>=0&&nqis[qi1]<=nqis[qi0];qi1--);
- /*Test to make sure there are even two unique quantizers.*/
- if(qi1>=0){
- ogg_int64_t best_metric;
- ogg_int64_t metric;
- int best_qi1;
- int best_qi2;
- int qii;
- for(qi2=qi1-1;qi2>=0&&nqis[qi2]<=nqis[qi1];qi2--);
- /*Test to make sure there are three unique quantizers.*/
- if(qi2>=0){
- best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
- (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
- best_qi1=qi1;
- best_qi2=qi2;
- for(;nqis[qi1]<nqis[1];qi1--){
- for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
- metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
- (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
- if(metric>=best_metric){
- best_qi1=qi1;
- best_qi2=qi2;
- best_metric=metric;
- }
- }
- }
- _enc->qis[fti][0]=qi0;
- _enc->qis[fti][1]=best_qi1;
- _enc->qis[fti][2]=best_qi2;
- _enc->nqis[fti]=3;
- }
- else{
- best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
- best_qi1=qi1;
- if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
- metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
- if(metric>best_metric){
- best_qi1=qi1;
- best_metric=metric;
- }
- }
- _enc->qis[fti][0]=qi0;
- _enc->qis[fti][1]=best_qi1;
- _enc->nqis[fti]=2;
- }
- /*Right now qis[0] is the largest.
- We want to use the smallest that is still large enough for our DC
- coefficients.*/
- for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
- qi0=_enc->qis[fti][0];
- _enc->qis[fti][0]=_enc->qis[fti][qii];
- _enc->qis[fti][qii]=qi0;
- }
- }
- else{
- _enc->qis[fti][0]=qi0;
- _enc->nqis[fti]=1;
- }
- /*If we're in VP3 compatibility mode, just use the first quantizer.*/
- if(_enc->vp3_compatible)_enc->nqis[fti]=1;
- }
-}
-
static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){
/*Mark this packet as a data packet.*/
oggpackB_write(&_enc->opb,0,1);
@@ -690,78 +385,6 @@
}
}
-/*Mark all fragments as coded and in OC_MODE_INTRA.
- This also selects a quantizer value for each fragment and builds up the
- coded fragment list (in coded order) and clears the uncoded fragment list.
- It does not update the coded macro block list, as that is not used when
- coding INTRA frames.*/
-static void oc_enc_mark_all_intra(oc_enc_ctx *_enc){
- oc_sb *sb;
- oc_sb *sb_end;
- int pli;
- int qii;
- int ncoded_fragis;
- int prev_ncoded_fragis;
- /*Select the quantizer list for INTRA frames.*/
- _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
- for(qii=0;qii<_enc->state.nqis;qii++){
- _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
- }
- prev_ncoded_fragis=ncoded_fragis=0;
- sb=sb_end=_enc->state.sbs;
- for(pli=0;pli<3;pli++){
- const oc_fragment_plane *fplane;
- fplane=_enc->state.fplanes+pli;
- sb_end+=fplane->nsbs;
- for(;sb<sb_end;sb++){
- int quadi;
- for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
- int bi;
- for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
- oc_fragment_enc_info *efrag;
- oc_fragment *frag;
- int fragi;
- int best_qii;
- fragi=sb->map[quadi][bi];
- frag=_enc->state.frags+fragi;
- frag->coded=1;
- frag->mbmode=OC_MODE_INTRA;
- efrag=_enc->frinfo+fragi;
- best_qii=0;
- for(qii=1;qii<_enc->state.nqis;qii++){
- if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
- (_enc->state.qis[best_qii]<efrag->qi_min[0]||
- _enc->state.qis[qii]<_enc->state.qis[best_qii])){
- best_qii=qii;
- }
- }
- efrag->qii=(unsigned char)best_qii;
- frag->qi=_enc->state.qis[best_qii];
- _enc->state.coded_fragis[ncoded_fragis++]=fragi;
-#if defined(OC_BITRATE_STATS)
- /*Compute the error function used for intra mode fragments.
- This function can only use information known at mode decision time, and
- so excludes the DC component.
- TODO: Separate this out somewhere more useful.*/
- {
- oc_fragment_enc_info *efrag;
- int ci;
- int eerror;
- efrag=_enc->frinfo+fragi;
- eerror=0;
- for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
- efrag->eerror=eerror;
- }
-#endif
- }
- }
- }
- _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
- prev_ncoded_fragis=ncoded_fragis;
- _enc->state.nuncoded_fragis[pli]=0;
- }
-}
-
static void oc_enc_block_qis_pack(oc_enc_ctx *_enc){
int *coded_fragi;
int *coded_fragi_end;
@@ -823,67 +446,6 @@
/*Performs an fDCT on a given fragment.
_frag: The fragment to perform the 2D DCT on.
_dct_vals: The output buffer for the DCT coefficients.
- _ystride: The Y stride of the plane the fragment belongs to.
- _framei: The picture buffer index to perform the DCT on.
- Use OC_FRAME_IO for the current input frame.*/
-void oc_enc_frag_intra_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
- ogg_int16_t _dct_vals[64],int _ystride,int _framei){
- ogg_int16_t pix_buf[64];
- unsigned char *pixels;
- int pixi;
- int y;
- int x;
- /*NOTE: 128 is subtracted from each pixel value to make it signed.
- The original VP3 source claimed that, "this reduces the internal precision
- requirments [sic] in the DCT transform."
- This is of course not actually true.
- The transform must still support input in the range [-255,255] to code
- predicted fragments, since the same transform is used for both.
- This actually _reduces_ the precision of the results, because larger
- (absolute) values would have fewer significant bits chopped off when
- rounding.
- We're stuck with it, however.
- At least it might reduce bias towards 0 when coding unpredicted DC
- coefficients, but that's not what VP3 justified it with.*/
- pixels=_frag->buffer[_framei];
- /*For border fragments, only copy pixels that are in the displayable
- region of the image.
- The DCT function will compute optimal padding values for the other
- pixels.*/
- if(_frag->border!=NULL){
- ogg_int64_t mask;
- mask=_frag->border->mask;
- for(pixi=y=0;y<8;y++){
- for(x=0;x<8;x++,pixi++){
- pix_buf[pixi]=(ogg_int16_t)(((int)mask&1)?pixels[x]-128:0);
- /*This branchless code is (almost) equivalent to the previous line:
- int pmask;
- pmask=-(int)mask&1;
- pix_buf[pixi]=(ogg_int16_t)(pmask&pixels[x]);
- We don't use this code to allow the user to pass in a buffer that is
- the exact size of the displayed image, not the size padded to a
- multiple of 16.
- In the latter case, we might segfault on pixels[x] if it is not mapped
- to a valid page.*/
- mask>>=1;
- }
- pixels+=_ystride;
- }
- oc_fdct8x8_border(_frag->border,_dct_vals,pix_buf);
- }
- /*Otherwise, copy all the pixels in the fragment and do a normal DCT.*/
- else{
- for(pixi=y=0;y<8;y++){
- for(x=0;x<8;x++,pixi++)pix_buf[pixi]=(ogg_int16_t)(pixels[x]-128);
- pixels+=_ystride;
- }
- oc_fdct8x8(_dct_vals,pix_buf);
- }
-}
-
-/*Performs an fDCT on a given fragment.
- _frag: The fragment to perform the 2D DCT on.
- _dct_vals: The output buffer for the DCT coefficients.
_pli: The color plane the fragment belongs to.*/
static void oc_enc_frag_inter_fdct(oc_enc_ctx *_enc,const oc_fragment *_frag,
ogg_int16_t _dct_vals[64],int _pli){
@@ -962,466 +524,13 @@
}
}
-/*Computes the SAD value of a fragment in the input image with respect to its
- motion compensated predictor..
- _frag: The fragment to find the SAD of.
- _dx: The X component of the motion vector.
- _dy: The Y component of the motion vector.
- _pli: The color plane the fragment belongs to.
- _frame: The reference frame to predict from.*/
-static int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
- int _dy,int _pli,int _frame){
- int cur_ystride;
- int ref_ystride;
- int ref_framei;
- int mvoffset0;
- int mvoffset1;
- cur_ystride=_enc->state.input[_pli].ystride;
- ref_framei=_enc->state.ref_frame_idx[_frame];
- ref_ystride=_enc->state.ref_frame_bufs[ref_framei][_pli].ystride;
- if(oc_state_get_mv_offsets(&_enc->state,&mvoffset0,&mvoffset1,_dx,_dy,
- ref_ystride,_pli)>1){
- if(_frag->border==NULL){
- return oc_sad8_halfpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
- _frag->buffer[ref_framei]+mvoffset0,
- _frag->buffer[ref_framei]+mvoffset1,ref_ystride);
- }
- else{
- return oc_sad8_halfpel_border(_frag->buffer[OC_FRAME_IO],cur_ystride,
- _frag->buffer[ref_framei]+mvoffset0,
- _frag->buffer[ref_framei]+mvoffset1,ref_ystride,_frag->border->mask);
- }
- }
- else{
- if(_frag->border==NULL){
- return oc_sad8_fullpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
- _frag->buffer[ref_framei]+mvoffset0,ref_ystride);
- }
- else{
- return oc_sad8_fullpel_border(_frag->buffer[OC_FRAME_IO],
- cur_ystride,_frag->buffer[ref_framei]+mvoffset0,ref_ystride,
- _frag->border->mask);
- }
- }
-}
-
-
-
-/*The number of different DCT coefficient values that can be stored by each
- of the different DCT value category tokens.*/
-static const int OC_DCT_VAL_CAT_SIZES[6]={2,4,8,16,32,512};
-/*The number of bits to shift the sign of the DCT coefficient over by for each
- of the different DCT value category tokens.*/
-static const int OC_DCT_VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
-
-
-
-/*Quantize and predict the DC coefficients.
- This is done in a separate step because the prediction of DC coefficients
- occurs in image order, not in the Hilbert-curve order, unlike the rest of
- the encoding process.*/
-static void oc_enc_quant_dc(oc_enc_ctx *_enc){
- oc_fragment_enc_info *efrag;
- oc_fragment *frag;
- int pli;
- frag=_enc->state.frags;
- efrag=_enc->frinfo;
- for(pli=0;pli<3;pli++){
- oc_fragment_plane *fplane;
- unsigned fquant;
- unsigned iquant;
- int pred_last[3];
- int fragx;
- int fragy;
- pred_last[OC_FRAME_GOLD]=0;
- pred_last[OC_FRAME_PREV]=0;
- pred_last[OC_FRAME_SELF]=0;
- fplane=_enc->state.fplanes+pli;
- for(fragy=0;fragy<fplane->nvfrags;fragy++){
- for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
- int qc_pred;
- int qc;
- if(!frag->coded)continue;
- qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
- /*Fragments outside the displayable region must still be coded in key
- frames.
- To minimize wasted bits, just use the predicted DC value.
- TODO: We might do a better job in the lower-left hand corner by
- propagating over the DC value of the first actually coded fragment,
- but for the moment this is not done.*/
- if(frag->invalid)qc=0;
- else{
- int c;
- int c_abs;
- int qti;
- /*We now center the DC coefficient range around the predicted value
- and perform token bits optimization based on the HVS-determined
- tolerance range.
- For more details, see oc_enc_frag_quant_tokenize().*/
- qti=frag->mbmode!=OC_MODE_INTRA;
- iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
- c=efrag->dct_coeffs[0]-qc_pred*iquant;
- c_abs=abs(c);
- if(c_abs<=efrag->tols[0])qc=0;
- else{
- int qc_signed[2];
- int qc_max;
- int qc_min;
- int qc_offs;
- int c_sign;
- int c_min;
- int c_recon;
- int cati;
- fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
- qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
- c_sign=c<0;
- c_recon=(qc_max-1)*iquant;
- c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
- for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
- if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
- else{
- qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
- for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
- cati++){
- qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
- }
- qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
- }
- qc_signed[0]=qc;
- qc_signed[1]=-qc;
- qc=qc_signed[c_sign];
- }
- }
- pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
- efrag->dct_coeffs[0]=(ogg_int16_t)qc;
- }
- }
- }
-}
-
-/*Quantize and tokenize the given fragment.
- _efrag: The encoder information for the fragment to quantize.
- _fquant: The forward quantization matrix to use.
- _iquant: The inverse quantization matrix to use.*/
-static int oc_enc_frag_quant_tokenize(oc_enc_ctx *_enc,
- oc_fragment_enc_info *_efrag,const ogg_uint16_t _fquant[64],
- const ogg_uint16_t _iquant[64]){
- int zzi;
- int zrun;
- int qc;
- int qc_offs;
- int c_sign;
- int cati;
- int tli;
- /*The DC coefficient is already quantized (it had to be for DC prediction).
- Here we just tokenize it.*/
- if(_efrag->dct_coeffs[0]){
- qc=abs(_efrag->dct_coeffs[0]);
- c_sign=_efrag->dct_coeffs[0]<0;
- switch(qc){
- case 1:{
- _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
- (unsigned char)(OC_ONE_TOKEN+c_sign);
- }break;
- case 2:{
- _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
- (unsigned char)(OC_TWO_TOKEN+c_sign);
- }break;
- default:{
- if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
- _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
- (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
- _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
- }
- else{
- qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
- for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
- qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
- }
- _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
- (unsigned char)(OC_DCT_VAL_CAT3+cati);
- _enc->extra_bits[0][_enc->nextra_bits[0]++]=
- (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
- }
- }
- }
- zrun=0;
- }
- else zrun=1;
- /*Now we quantize and tokenize each AC coefficient.*/
- for(zzi=1;zzi<64;zzi++){
- int qc_signed[2];
- int qc_max;
- int qc_min;
- int c_sign;
- int c_abs;
- int c_min;
- int c_recon;
- int ci;
- ci=OC_FZIG_ZAG[zzi];
- c_abs=abs(_efrag->dct_coeffs[ci]);
- /*Best case: we can encode this as a zero.*/
- if(c_abs<=_efrag->tols[ci]){
- zrun++;
- _efrag->dct_coeffs[ci]=0;
- }
- else{
- c_sign=_efrag->dct_coeffs[ci]<0;
- /*qc_max is the most accurate quantized value.
- This is the largest possible (absolute) value we will use.*/
- qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
- /*qc_min is the smallest possible (by absolute value) quantized value
- whose dequantized value is within the HVS-determined tolerance
- range.*/
- /*TODO: qc_min could be computed by a division (we do not want to allow
- the rounding errors that are possible with the mul+shift quantization
- used for qc_max), which would allow qc_max to be calculated only if
- needed below.
- Is this faster?
- Who knows.*/
- c_recon=(qc_max-1)*_iquant[ci];
- c_min=c_abs-_efrag->tols[ci];
- for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
- /*We now proceed to find a token that is as close to qc_max as possible,
- but does not use any more bits than would be required for qc_min.
- The general assumption we make is that encoding a value closer to 0
- always uses fewer bits.
- qc_min can still reach 0 here despite the test above, if the quantizer
- value is larger than the tolerance (which can happen for very small
- tolerances; the quantizer value has a minimum it cannot go below).*/
- if(qc_min==0){
- zrun++;
- _efrag->dct_coeffs[ci]=0;
- }
- else{
- /*If we have an outstanding zero run, code it now.*/
- if(zrun>0){
- /*The zero run tokens appear on the list for the first zero in the
- run.*/
- tli=zzi-zrun;
- /*Second assumption: coding a combined run/value token always uses
- fewer bits than coding them separately.*/
- /*CAT1 run/value tokens: the value is 1.*/
- if(qc_min==1&&zrun<=17){
- if(zrun<=5){
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)c_sign;
- }
- else if(zrun<=9){
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_RUN_CAT1B;
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)((c_sign<<2)+zrun-6);
- }
- else{
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_RUN_CAT1C;
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)((c_sign<<3)+zrun-10);
- }
- qc_signed[0]=1;
- qc_signed[1]=-1;
- _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
- zrun=0;
- /*Skip coding the DCT value below.*/
- continue;
- }
- /*CAT2 run/value tokens: the value is 2-3.*/
- else if(qc_min<=3&&zrun<=3){
- if(zrun==1){
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_RUN_CAT2A;
- qc=OC_MINI(3,qc_max);
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)((c_sign<<1)+qc-2);
- }
- else{
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_RUN_CAT2B;
- qc=OC_MINI(3,qc_max);
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
- }
- qc_signed[0]=qc;
- qc_signed[1]=-qc;
- _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
- zrun=0;
- /*Skip coding the DCT value below.*/
- continue;
- }
- /*The run is too long or the quantized value too large: code them
- separately.*/
- else{
- /*This is stupid: non-short ZRL tokens are never used for run
- values less than 9, but codewords are reserved for them,
- wasting bits.
- Yes, yes, this would've meant a non-constant number of extra
- bits for this token, but even so.*/
- if(zrun<=8){
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_SHORT_ZRL_TOKEN;
- }
- else{
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
- OC_DCT_ZRL_TOKEN;
- }
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
- (ogg_uint16_t)(zrun-1);
- zrun=0;
- }
- }
- /*No zero run, or the run and the qc value are being coded
- separately.*/
- switch(qc_min){
- case 1:{
- _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
- (unsigned char)(OC_ONE_TOKEN+c_sign);
- _efrag->dct_coeffs[ci]=(ogg_int16_t)((-c_sign<<1)+1);
- }break;
- case 2:{
- _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
- (unsigned char)(OC_TWO_TOKEN+c_sign);
- _efrag->dct_coeffs[ci]=(ogg_int16_t)((-c_sign<<2)+2);
- }break;
- default:{
- if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
- _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
- (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
- _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
- (ogg_uint16_t)c_sign;
- qc_signed[0]=qc_min;
- qc_signed[1]=-qc_min;
- _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
- }
- else{
- qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
- for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
- cati++){
- qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
- }
- /*qc_min can be encoded in this category.
- Since all DCT values in the category use the same number of
- bits, we encode the closest value to qc_max.
- This is either qc_max itself, if it is in the category's
- range, or the largest value in the category.*/
- qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
- qc_signed[0]=qc;
- qc_signed[1]=-qc;
- _efrag->dct_coeffs[ci]=(ogg_int16_t)qc_signed[c_sign];
- _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
- (unsigned char)(OC_DCT_VAL_CAT3+cati);
- _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
- ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
- }
- }
- }
- }
- }
- }
- /*If there's a trailing zero run, code an EOB token.*/
- if(zrun>0){
- int old_tok;
- int toki;
- int ebi;
- tli=64-zrun;
- toki=_enc->ndct_tokens[tli]-1;
- if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
- else old_tok=-1;
- /*Try to extend an EOB run.*/
- switch(old_tok){
- case OC_DCT_EOB1_TOKEN:
- case OC_DCT_EOB2_TOKEN:{
- _enc->dct_tokens[tli][toki]++;
- }break;
- case OC_DCT_EOB3_TOKEN:{
- _enc->dct_tokens[tli][toki]++;
- _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
- }break;
- case OC_DCT_REPEAT_RUN0_TOKEN:{
- ebi=_enc->nextra_bits[tli]-1;
- if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
- else{
- _enc->dct_tokens[tli][toki]++;
- _enc->extra_bits[tli][ebi]=0;
- }
- }break;
- case OC_DCT_REPEAT_RUN1_TOKEN:{
- ebi=_enc->nextra_bits[tli]-1;
- if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
- else{
- _enc->dct_tokens[tli][toki]++;
- _enc->extra_bits[tli][ebi]=0;
- }
- }break;
- case OC_DCT_REPEAT_RUN2_TOKEN:{
- ebi=_enc->nextra_bits[tli]-1;
- if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
- else{
- _enc->dct_tokens[tli][toki]++;
- /*Again stupid: we could encode runs up to 4127, but inexplicably
- they don't subtract the bottom of the range here, so we can only
- go to 4095 (unless we want to change the spec to deal with
- wrap-around).*/
- _enc->extra_bits[tli][ebi]=32;
- }
- }break;
- case OC_DCT_REPEAT_RUN3_TOKEN:{
- ebi=_enc->nextra_bits[tli]-1;
- if(_enc->extra_bits[tli][ebi]<4095){
- _enc->extra_bits[tli][ebi]++;
- break;
- }
- /*else fall through.*/
- }
- /*Start a new EOB run.*/
- default:{
- _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
- }
- }
- }
- /*Return the number of coefficients before the final zero run.*/
- return 64-zrun;
-}
-
-static void oc_enc_residual_tokenize(oc_enc_ctx *_enc){
- int *coded_fragi;
- int *coded_fragi_end;
- int pli;
+/*Merge the final EOB run of each coefficient list with the start of the next,
+ if possible.
+ This assumes that dct_token_offs[0][zzi] is 0 for each zzi, and will
+ increase it as appropriate if an EOB run is merged with that of a previous
+ token index.*/
+void oc_enc_merge_eob_runs(oc_enc_ctx *_enc){
int zzi;
- /*Clear any existing DCT tokens.*/
- for(zzi=0;zzi<64;zzi++){
- _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
- _enc->extra_bits_offs[zzi]=0;
- }
- coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
- for(pli=0;pli<3;pli++){
- memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
- sizeof(_enc->dct_token_offs[pli]));
- coded_fragi_end+=_enc->state.ncoded_fragis[pli];
- for(;coded_fragi<coded_fragi_end;coded_fragi++){
- oc_quant_table *iquants;
- oc_fragment *frag;
- oc_fragment_enc_info *efrag;
- int fragi;
- int qti;
- int nnzc;
- fragi=*coded_fragi;
- frag=_enc->state.frags+fragi;
- efrag=_enc->frinfo+fragi;
- qti=frag->mbmode!=OC_MODE_INTRA;
- iquants=_enc->state.dequant_tables[qti][pli];
- nnzc=oc_enc_frag_quant_tokenize(_enc,efrag,
- _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
- /*While we're here and things are in cache, reconstruct the quantized
- fragment.*/
- oc_state_frag_recon(&_enc->state,frag,pli,efrag->dct_coeffs,nnzc,nnzc,
- iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
- }
- }
- /*Merge the final EOB run of one coefficient list with the start of the
- next, if possible.*/
for(zzi=1;zzi<64;zzi++){
static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
@@ -1617,33 +726,455 @@
#endif
}
-/*Encodes the current frame as a key frame.
- The result is stored in the opb field, and the packet state is updated to
- indicate a new packet is ready.
- Return: 0 on success, or a negative value on error.*/
-static int oc_enc_keyframe(oc_enc_ctx *_enc){
- _enc->state.frame_type=OC_INTRA_FRAME;
- oc_enc_quant_sel_quality(_enc,1);
- oc_enc_mark_all_intra(_enc);
- oc_enc_quant_dc(_enc);
- oc_enc_residual_tokenize(_enc);
- oggpackB_reset(&_enc->opb);
- oc_enc_frame_header_pack(_enc);
- oc_enc_block_qis_pack(_enc);
- oc_enc_residual_tokens_pack(_enc);
+
+static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
+ const theora_huff_code *codes;
+ const int *mode_ranks;
+ int *coded_mbi;
+ int *coded_mbi_end;
+ int scheme;
+ scheme=_enc->mode_scheme_chooser.scheme_list[0];
+ oggpackB_write(&_enc->opb,scheme,3);
+ if(scheme==0){
+ int ranks[8];
+ int mi;
+ /*The numbers associated with each mode in the stream are slightly
+ different than what we use in the source.
+ The lookup here converts between the two.*/
+ for(mi=0;mi<OC_NMODES;mi++){
+ ranks[OC_MODE_SCHEMES[6][mi]]=
+ _enc->mode_scheme_chooser.scheme0_ranks[mi];
+ }
+ for(mi=0;mi<OC_NMODES;mi++)oggpackB_write(&_enc->opb,ranks[mi],3);
+ }
+ codes=_enc->mode_scheme_chooser.mode_codes[scheme];
+ mode_ranks=_enc->mode_scheme_chooser.mode_ranks[scheme];
+ coded_mbi=_enc->state.coded_mbis;
+ coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
+ for(;coded_mbi<coded_mbi_end;coded_mbi++){
+ const theora_huff_code *code;
+ oc_mb *mb;
+ mb=_enc->state.mbs+*coded_mbi;
+ code=codes+mode_ranks[mb->mode];
+ oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+ }
+}
+
+static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _dx,int _dy){
+ const theora_huff_code *code;
+ code=OC_MV_CODES[_enc->mv_scheme]+_dx+31;
+ oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+ code=OC_MV_CODES[_enc->mv_scheme]+_dy+31;
+ oggpackB_write(&_enc->opb,code->pattern,code->nbits);
+}
+
+static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
+ int *coded_mbi;
+ int *coded_mbi_end;
+ oggpackB_write(&_enc->opb,_enc->mv_scheme,1);
+ coded_mbi=_enc->state.coded_mbis;
+ coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
+ for(;coded_mbi<coded_mbi_end;coded_mbi++){
+ oc_mb *mb;
+ oc_mb_enc_info *mbinfo;
+ int mbi;
+ mbi=*coded_mbi;
+ mb=_enc->state.mbs+mbi;
+ switch(mb->mode){
+ case OC_MODE_INTER_MV:
+ case OC_MODE_GOLDEN_MV:{
+ int which_frame;
+ which_frame=OC_FRAME_FOR_MODE[mb->mode];
+ mbinfo=_enc->mbinfo+mbi;
+ oc_enc_mv_pack(_enc,mbinfo->mvs[0][which_frame][0],
+ mbinfo->mvs[0][which_frame][1]);
+ }break;
+ case OC_MODE_INTER_MV_FOUR:{
+ int bi;
+ mbinfo=_enc->mbinfo+mbi;
+ for(bi=0;bi<4;bi++){
+ int fragi;
+ fragi=mb->map[0][bi];
+ if(fragi>=0&&_enc->state.frags[fragi].coded){
+ oc_enc_mv_pack(_enc,mbinfo->bmvs[bi][0],mbinfo->bmvs[bi][1]);
+ }
+ }
+ }break;
+ }
+ }
+}
+
+static void oc_enc_enable_default_mode(oc_enc_ctx *_enc){
+ /*TODO: Right now we always use VBR mode.
+ When a CBR mode is available, we should use that by default if the user
+ specifies a bitrate, but not a quality, in the theora_info struct.*/
+ if(_enc->vbr==NULL)_enc->vbr=oc_enc_vbr_alloc(_enc);
+ oc_enc_vbr_enable(_enc->vbr,NULL);
+}
+
+/*A pipeline stage for copying uncoded fragments.*/
+
+static int oc_copy_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++){
+ _stage->y_procd[pli]=0;
+ _stage->enc->uncoded_fragii[pli]=0;
+ }
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_copy_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int *uncoded_fragis;
+ oc_enc_ctx *enc;
+ int pli;
+ enc=_stage->enc;
+ uncoded_fragis=enc->state.uncoded_fragis;
+ for(pli=0;pli<3;pli++){
+ int y_avail;
+ y_avail=_y_avail[pli];
+ /*Process in units of super block rows, with the possible exception of the
+ last, partial super block row.*/
+ if(y_avail<enc->state.input[pli].height)y_avail&=~31;
+ if(y_avail>_stage->y_procd[pli]){
+ if(enc->uncoded_fragii[pli]<enc->state.nuncoded_fragis[pli]){
+ oc_fragment_plane *fplane;
+ int fragi_end;
+ int fragii;
+ fplane=enc->state.fplanes+pli;
+ fragi_end=(y_avail>>3)*fplane->nhfrags+fplane->froffset;
+ /*Count the uncoded fragments that belong in these super block rows.*/
+ for(fragii=enc->uncoded_fragii[pli];
+ fragii<enc->state.nuncoded_fragis[pli]&&
+ *(uncoded_fragis-fragii)<fragi_end;fragii++);
+ /*And copy them.*/
+ oc_state_frag_copy(&enc->state,uncoded_fragis-fragii,
+ fragii-enc->uncoded_fragii[pli],OC_FRAME_SELF,OC_FRAME_PREV,pli);
+ enc->uncoded_fragii[pli]=fragii;
+ }
+ _stage->y_procd[pli]=y_avail;
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ uncoded_fragis-=enc->state.nuncoded_fragis[pli];
+ }
+ return 0;
+}
+
+static int oc_copy_pipe_end(oc_enc_pipe_stage *_stage){
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the uncoded fragment copying stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_copy_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_copy_pipe_start;
+ _stage->pipe_proc=oc_copy_pipe_process;
+ _stage->pipe_end=oc_copy_pipe_end;
+}
+
+/*A pipeline stage for applying the loop filter.*/
+
+static int oc_loop_pipe_start(oc_enc_pipe_stage *_stage){
+ oc_enc_ctx *enc;
+ int pli;
+ enc=_stage->enc;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ enc->loop_filter_enabled=enc->ncoded_frags>0&&
+ !oc_state_loop_filter_init(&enc->state,enc->bounding_values+256);
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_loop_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ oc_enc_ctx *enc;
+ int pli;
+ enc=_stage->enc;
+ if(enc->loop_filter_enabled){
+ int refi;
+ refi=enc->state.ref_frame_idx[OC_FRAME_SELF];
+ for(pli=0;pli<3;pli++){
+ int delay;
+ int fragy0;
+ int fragy_end;
+ fragy0=_stage->y_procd[pli]+1>>3;
+ /*Add a 2 pixel delay for the vertical filter, except in the last row.*/
+ delay=(_y_avail[pli]<enc->state.ref_frame_bufs[refi][pli].height);
+ fragy_end=_y_avail[pli]-(delay<<1)>>3;
+ if(fragy_end>fragy0){
+ oc_state_loop_filter_frag_rows(&enc->state,enc->bounding_values+256,
+ refi,pli,fragy0,fragy_end);
+ /*We also add a 1 pixel delay to the next stage, since the vertical
+ filter for the next fragment row can still change the last row of
+ pixels from this fragment row.*/
+ _stage->y_procd[pli]=(fragy_end<<3)-delay;
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ }
+ }
+ else{
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ if(_stage->next!=NULL){
+ return (*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ }
+ }
+ return 0;
+}
+
+static int oc_loop_pipe_end(oc_enc_pipe_stage *_stage){
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the loop filter stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_loop_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_loop_pipe_start;
+ _stage->pipe_proc=oc_loop_pipe_process;
+ _stage->pipe_end=oc_loop_pipe_end;
+}
+
+/*A pipeline stage for filling in the image border.*/
+
+static int oc_fill_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_fill_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int pli;
+ if(_stage->enc->ncoded_frags>0){
+ oc_theora_state *state;
+ int refi;
+ state=&_stage->enc->state;
+ refi=state->ref_frame_idx[OC_FRAME_SELF];
+ for(pli=0;pli<3;pli++){
+ if(_stage->y_procd[pli]<_y_avail[pli]){
+ oc_state_borders_fill_rows(state,refi,pli,_stage->y_procd[pli],
+ _y_avail[pli]);
+ _stage->y_procd[pli]=_y_avail[pli];
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ }
+ }
+ else{
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ if(_stage->next!=NULL){
+ return (*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ }
+ }
+ return 0;
+}
+
+static int oc_fill_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_theora_state *state;
+ int refi;
+ int pli;
+ state=&_stage->enc->state;
+ refi=state->ref_frame_idx[OC_FRAME_SELF];
+ for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(state,refi,pli);
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the loop filter stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_fill_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_fill_pipe_start;
+ _stage->pipe_proc=oc_fill_pipe_process;
+ _stage->pipe_end=oc_fill_pipe_end;
+}
+
+/*A pipeline stage for storing the encoded frame contents in a packet.*/
+
+static int oc_pack_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ return 0;
+}
+
+static int oc_pack_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ return 0;
+}
+
+static int oc_pack_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_enc_ctx *enc;
+ int ret;
+ if(_stage->next!=NULL){
+ ret=(*_stage->next->pipe_start)(_stage->next);
+ if(ret<0)return ret;
+ }
+ enc=_stage->enc;
+ oggpackB_reset(&enc->opb);
+ /*Only proceed if we have some coded blocks.
+ No coded blocks -> dropped frame -> 0 byte packet.*/
+ if(enc->ncoded_frags>0){
+ oc_enc_frame_header_pack(enc);
+ if(enc->state.frame_type==OC_INTER_FRAME){
+ oggpackB_writecopy(&enc->opb,
+ oggpackB_get_buffer(&enc->opb_coded_flags),
+ oggpackB_bits(&enc->opb_coded_flags));
+ oc_enc_mb_modes_pack(enc);
+ oc_enc_mvs_pack(enc);
+ }
+ oc_enc_block_qis_pack(enc);
+ /*Pack the quantized DCT coefficients.*/
+ oc_enc_residual_tokens_pack(enc);
+ }
/*Success: Mark the packet as ready to be flushed.*/
- _enc->packet_state=OC_PACKET_READY;
+ enc->packet_state=OC_PACKET_READY;
+ if(_stage->next!=NULL){
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ return (*_stage->next->pipe_end)(_stage->next);
+ }
return 0;
}
+/*Initialize the loop filter stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_pack_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_pack_pipe_start;
+ _stage->pipe_proc=oc_pack_pipe_process;
+ _stage->pipe_end=oc_pack_pipe_end;
+}
+static int oc_enc_init(oc_enc_ctx *_enc,const theora_info *_info){
+ int ret;
+ /*Initialize the shared encoder/decoder state.*/
+ ret=oc_state_init(&_enc->state,_info);
+ if(ret<0)return ret;
+ _enc->block_coded_flags=_ogg_calloc(_enc->state.nfrags,
+ sizeof(_enc->block_coded_flags[0]));
+ /*Initialize our packet buffers.*/
+ oggpackB_writeinit(&_enc->opb);
+ oggpackB_writeinit(&_enc->opb_coded_flags);
+ /*Allocate and initialize storage for encoder-specific fragment and macro
+ block storage, as well as DCT token storage.*/
+ _enc->frinfo=_ogg_calloc(_enc->state.nfrags,
+ sizeof(_enc->frinfo[0]));
+ _enc->mbinfo=_ogg_calloc(_enc->state.nmbs,sizeof(_enc->mbinfo[0]));
+ _enc->dct_tokens=(unsigned char **)oc_malloc_2d(64,
+ _enc->state.nfrags,sizeof(_enc->dct_tokens[0][0]));
+ _enc->extra_bits=(ogg_uint16_t **)oc_malloc_2d(64,
+ _enc->state.nfrags,sizeof(_enc->extra_bits[0][0]));
+ oc_enc_init_mbinfo(_enc);
+ /*Do one-time mode scheme chooser initialization.*/
+ oc_mode_scheme_chooser_init(&_enc->mode_scheme_chooser);
+ /*Set the maximum distance between key frames.*/
+ _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
+ /*Initialize the motion compensation, high-level importance map, and
+ low-level psychovisual model plug-ins.*/
+ _enc->mcenc=oc_mcenc_alloc(_enc);
+ /*Reset the packet-out state machine.*/
+ _enc->packet_state=OC_PACKET_INFO_HDR;
+ /*Mark us as not VP3-compatible.*/
+ _enc->vp3_compatible=0;
+ /*Set the Huffman codes and quantization parameters to the defaults.*/
+ memcpy(_enc->huff_codes,OC_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
+ oc_enc_set_quant_params(_enc,NULL);
+ /*Initialize the static pipeline stages.*/
+ oc_fdct_pipe_init(&_enc->fdct_pipe,_enc);
+ oc_copy_pipe_init(&_enc->copy_pipe,_enc);
+ oc_loop_pipe_init(&_enc->loop_pipe,_enc);
+ _enc->copy_pipe.next=&_enc->loop_pipe;
+ oc_fill_pipe_init(&_enc->fill_pipe,_enc);
+ _enc->loop_pipe.next=&_enc->fill_pipe;
+ oc_pack_pipe_init(&_enc->pack_pipe,_enc);
+ /*Delay initialization of the encoding pipeline until the application sets
+ an encoding mode or the first frame is submitted.*/
+ _enc->pipe=NULL;
+ _enc->vbr=NULL;
+ return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc){
+ oc_enc_vbr_free(_enc->vbr);
+ oc_mcenc_free(_enc->mcenc);
+ oc_free_2d(_enc->extra_bits);
+ oc_free_2d(_enc->dct_tokens);
+ _ogg_free(_enc->mbinfo);
+ _ogg_free(_enc->frinfo);
+ _ogg_free(_enc->block_coded_flags);
+ oc_state_clear(&_enc->state);
+}
+
+
+
+/*A default implementation of set_speed, to use when the encoding mode is not
+ configurable.
+ It does nothing.
+ _speed: The encoding speed to use.*/
+void oc_enc_set_speed_null(oc_enc_ctx *_enc,int _speed){}
+
+/*Computes the SAD value of a fragment in the input image with respect to its
+ motion compensated predictor..
+ _frag: The fragment to find the SAD of.
+ _dx: The X component of the motion vector.
+ _dy: The Y component of the motion vector.
+ _pli: The color plane the fragment belongs to.
+ _frame: The reference frame to predict from.*/
+int oc_enc_frag_sad(oc_enc_ctx *_enc,oc_fragment *_frag,int _dx,
+ int _dy,int _pli,int _frame){
+ int cur_ystride;
+ int ref_ystride;
+ int ref_framei;
+ int mvoffset0;
+ int mvoffset1;
+ cur_ystride=_enc->state.input[_pli].ystride;
+ ref_framei=_enc->state.ref_frame_idx[_frame];
+ ref_ystride=_enc->state.ref_frame_bufs[ref_framei][_pli].ystride;
+ if(oc_state_get_mv_offsets(&_enc->state,&mvoffset0,&mvoffset1,_dx,_dy,
+ ref_ystride,_pli)>1){
+ if(_frag->border==NULL){
+ return oc_sad8_halfpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
+ _frag->buffer[ref_framei]+mvoffset0,
+ _frag->buffer[ref_framei]+mvoffset1,ref_ystride);
+ }
+ else{
+ return oc_sad8_halfpel_border(_frag->buffer[OC_FRAME_IO],cur_ystride,
+ _frag->buffer[ref_framei]+mvoffset0,
+ _frag->buffer[ref_framei]+mvoffset1,ref_ystride,_frag->border->mask);
+ }
+ }
+ else{
+ if(_frag->border==NULL){
+ return oc_sad8_fullpel(_frag->buffer[OC_FRAME_IO],cur_ystride,
+ _frag->buffer[ref_framei]+mvoffset0,ref_ystride);
+ }
+ else{
+ return oc_sad8_fullpel_border(_frag->buffer[OC_FRAME_IO],
+ cur_ystride,_frag->buffer[ref_framei]+mvoffset0,ref_ystride,
+ _frag->border->mask);
+ }
+ }
+}
+
/*Writes the bit flags for whether or not each super block is partially coded
or not.
These flags are run-length encoded, with the flag value alternating between
each run.
Return: The number of bits written.*/
-static int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
oc_sb *sb;
oc_sb *sb_end;
unsigned flag;
@@ -1651,7 +1182,7 @@
int ret;
/*Write the list of partially coded super block flags.*/
flag=_enc->state.sbs[0].coded_partially;
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret=1;
sb=_enc->state.sbs;
sb_end=sb+_enc->state.nsbs;
@@ -1668,18 +1199,18 @@
invalid code for longer runs.*/
/*First, encode runs until we have 4129 or fewer sbs left.*/
while(run_count>4129){
- ret+=oc_sb_run_pack(&_enc->opb_coded_flags,4129);
+ ret+=oc_sb_run_pack(_opb,4129);
run_count-=4129;
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret++;
}
/*Encode the last run.*/
- ret+=oc_sb_run_pack(&_enc->opb_coded_flags,run_count);
+ ret+=oc_sb_run_pack(_opb,run_count);
flag=!flag;
/*If there are more sbs to come, and we had a run of 4129 exactly,
encode the flipped bit.*/
if(run_count==4129&&sb<sb_end){
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret++;
}
}
@@ -1691,7 +1222,7 @@
These flags are run-length encoded, with the flag value altenating between
each run.
Return: The number of bits written.*/
-static int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
oc_sb *sb;
oc_sb *sb_end;
unsigned flag;
@@ -1706,7 +1237,7 @@
if(!sb->coded_partially)break;
}
flag=sb->coded_fully;
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret=1;
while(sb<sb_end){
for(run_count=0;sb<sb_end;sb++){
@@ -1722,16 +1253,16 @@
invalid code for longer runs.*/
/*First, encode runs until we have 4129 or fewer sbs left.*/
while(run_count>4129){
- ret+=oc_sb_run_pack(&_enc->opb_coded_flags,4129);
+ ret+=oc_sb_run_pack(_opb,4129);
run_count-=4129;
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret++;
}
/*Encode the last run.*/
- ret+=oc_sb_run_pack(&_enc->opb_coded_flags,run_count);
+ ret+=oc_sb_run_pack(_opb,run_count);
flag=!flag;
if(run_count==4129&&sb<sb_end){
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret++;
}
}
@@ -1743,14 +1274,14 @@
These flags are run-length encoded, with the flag value alternating between
each run.
Return: The number of bits written.*/
-static int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc){
+int oc_enc_coded_block_flags_pack(oc_enc_ctx *_enc,oggpack_buffer *_opb){
int flag;
int run_count;
int bli;
int ret;
if(_enc->nblock_coded_flags<=0)return 0;
flag=_enc->block_coded_flags[0];
- oggpackB_write(&_enc->opb_coded_flags,flag,1);
+ oggpackB_write(_opb,flag,1);
ret=1;
for(bli=0;bli<_enc->nblock_coded_flags;){
for(run_count=0;bli<_enc->nblock_coded_flags;bli++){
@@ -1763,580 +1294,15 @@
or its complement).
This avoids the nastiness of the VLC not letting us encode runs long
enough like above.*/
- ret+=oc_block_run_pack(&_enc->opb_coded_flags,run_count);
+ ret+=oc_block_run_pack(_opb,run_count);
flag=!flag;
}
return ret;
}
-/*Marks each fragment as coded or not, based on the coefficient-level
- thresholds computed in the psychovisual stage.
- The MB mode of the fragments are not set, as they will be computed in
- oc_enc_choose_mbmodes().
- This also builds up the coded fragment and uncoded fragment lists.
- The coded MB list is not built up.
- That is done during mode decision.*/
-static void oc_enc_mark_coded(oc_enc_ctx *_enc){
- oc_sb *sb;
- oc_sb *sb_end;
- int pli;
- int bli;
- int ncoded_fragis;
- int prev_ncoded_fragis;
- int nuncoded_fragis;
- int prev_nuncoded_fragis;
- _enc->nblock_coded_flags=bli=0;
- prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
- sb=sb_end=_enc->state.sbs;
- for(pli=0;pli<3;pli++){
- const oc_fragment_plane *fplane;
- int ystride;
- int prev_refi;
- fplane=_enc->state.fplanes+pli;
- sb_end+=fplane->nsbs;
- prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
- ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
- for(;sb<sb_end;sb++){
- int quadi;
- sb->coded_fully=1;
- sb->coded_partially=0;
- for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
- int bi;
- for(bi=0;bi<4;bi++){
- int fragi;
- fragi=sb->map[quadi][bi];
- if(fragi>=0){
- oc_fragment *frag;
- int flag;
- frag=_enc->state.frags+fragi;
- if(frag->invalid){
- frag->coded=0;
- *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
- }
- else{
- oc_fragment_enc_info *efrag;
- ogg_int16_t dct_buf[64];
- int ci;
- /*Check to see if the fragment can be skipped.
- It is assumed that a skipped fragment always takes fewer bits
- than a coded fragment, though this may not necessarily be true.
- A single skipped fragment could take up to 34 bits to encode
- its location in the RLE scheme Theora uses */
- oc_enc_frag_intra_fdct(_enc,frag,dct_buf,ystride,prev_refi);
- efrag=_enc->frinfo+fragi;
- /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
- ensures we mark a fragment as skipped if it would be quantized
- to all zeros in OC_MODE_INTER_NOMV.
- These minimum quantizers represent the maximum quality the
- format is capable of, and can be larger than our tolerances.
- The minimum for INTER modes is twice the minimum for INTRA
- modes, so technically if the tolerances are below this
- threshold, we might be able to do a better job representing
- this fragment by coding it in INTRA mode.
- But the number of extra bits required to do that would be
- ridiculous, so we give up our devotion to minimum quality just
- this once.
-
- Note: OC_DC_QUANT_MIN[0] should actually be
- OC_DC_QUANT_MIN[1]>>1, but in this case those are
- equivalent.*/
- ci=0;
- if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
- OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
- for(ci++;ci<64;ci++){
- if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
- OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
- break;
- }
- }
- }
- if(ci>=64){
- frag->coded=0;
- *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
- }
- else{
- frag->coded=1;
- _enc->state.coded_fragis[ncoded_fragis++]=fragi;
- }
- }
- flag=frag->coded;
- sb->coded_fully&=flag;
- sb->coded_partially|=flag;
- _enc->block_coded_flags[bli++]=(char)flag;
- }
- }
- }
- /*If this is a partially coded super block, keep the entries just added
- to the code block flag list.*/
- if(!sb->coded_fully&&sb->coded_partially){
- _enc->nblock_coded_flags=bli;
- }
- /*Otherwise, discard these entries from the list, as they are
- implicit.*/
- else{
- sb->coded_partially=0;
- bli=_enc->nblock_coded_flags;
- }
- }
- _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
- prev_ncoded_fragis=ncoded_fragis;
- _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
- prev_nuncoded_fragis=nuncoded_fragis;
- }
-}
-
-/*Selects an appropriate coding mode for each macro block.
- A mode is chosen for the macro blocks with at least one coded fragment.
- A bit cost estimate for coding the frame with the selected modes is made,
- and a similar estimate is made for coding the frame as a key frame.
- These estimates are used to select the optimal frame type.
- Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
-static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
- oc_set_chroma_mvs_func set_chroma_mvs;
- oc_fragment_enc_info *efrag;
- oc_fragment *frag;
- oc_mb *mb;
- oc_mb_enc_info *mbinfo;
- char last_mv[2][2];
- int *uncoded_fragi;
- int *uncoded_fragi_end;
- int best_qii;
- int qii;
- int qi;
- int pli;
- int mbi;
- int fragi;
- int ci;
- int nmbs;
- int mvbitsa;
- int mvbitsb;
- int intra_bits;
- int inter_bits;
- nmbs=_enc->state.nmbs;
- set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
- oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
- memset(last_mv,0,sizeof(last_mv));
- mbinfo=_enc->mbinfo;
- mvbitsa=mvbitsb=0;
- inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
- intra_bits=inter_bits+3;
- _enc->state.ncoded_mbis=0;
- for(mbi=0;mbi<nmbs;mbi++){
- mb=_enc->state.mbs+mbi;
- if(mb->mode!=OC_MODE_INVALID){
- oc_fragment_enc_info *efrag;
- char bmvs[2][4][2];
- char mbmv[2];
- int err[OC_NMODES][12];
- int bits[OC_NMODES];
- int coded[13];
- int frag_qii[12][2][2];
- int ncoded;
- int ncoded_luma;
- int mapii;
- int mapi;
- int modei;
- int codedi;
- int mbintrabits;
- int mbpmvbitsa;
- int mbgmvbitsa;
- int mb4mvbitsa;
- int mb4mvbitsb;
- int fti;
- int qti;
- int bi;
- mbinfo=_enc->mbinfo+mbi;
- /*Build up a list of coded fragments.*/
- ncoded=0;
- for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
- mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
- fragi=mb->map[mapi>>2][mapi&3];
- if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
- }
- /*If we don't find any, mark this MB not coded and move on.*/
- if(ncoded<=0){
- mb->mode=OC_MODE_NOT_CODED;
- /*Don't bother to do a MV search against the golden frame.
- Just re-use the last vector, which should match well since the
- contents of the MB haven't changed much.*/
- mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
- mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
- continue;
- }
- /*Count the number of coded blocks that are luma blocks, and replace the
- block MVs for not-coded blocks with (0,0).*/
- memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
- /*Mark the end of the list so we don't go past it below.*/
- coded[ncoded]=-1;
- for(mapi=ncoded_luma=0;mapi<4;mapi++){
- if(coded[ncoded_luma]==mapi)ncoded_luma++;
- else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
- }
- /*Select a qi value for each coded fragment for each frame type and
- quantizer type.*/
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
- for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
- best_qii=0;
- for(qii=1;qii<_enc->nqis[fti];qii++){
- if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
- (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
- _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
- best_qii=qii;
- }
- }
- frag_qii[codedi][fti][qti]=best_qii;
- }
- }
- /*Special case: If no luma blocks are coded, but some chroma blocks are,
- then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
- be explicitly coded for it.*/
- if(ncoded_luma<=0){
- mb->mode=OC_MODE_NOT_CODED;
- /*Don't bother to do a MV search against the golden frame.*/
- mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
- /*We do collect bitrate stats for frame type decision.*/
- mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- pli=mapi>>2;
- fragi=mb->map[pli][mapi&3];
- frag=_enc->state.frags+fragi;
- efrag=_enc->frinfo+fragi;
- /*Set the MB mode and MV in the fragment.*/
- frag->mbmode=OC_MODE_INTER_NOMV;
- frag->mv[0]=frag->mv[1]=0;
- /*Calculate the bitrate estimates.*/
- err[OC_MODE_INTRA][mapi]=0;
- for(ci=1;ci<64;ci++){
- err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
- }
- err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
- OC_FRAME_PREV);
- qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
- mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
- OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
- qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
- bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
- OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
- /*Also mark this fragment with the selected INTER qi.
- It will be reset if we eventually code this as an INTRA frame.*/
-#if defined(OC_BITRATE_STATS)
- efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
-#endif
- efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
- frag->qi=qi;
- }
- intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
- inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
- continue;
- }
- /*Otherwise, add this to the coded MB list.*/
- _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
- /*Compute the chroma MVs for the 4MV mode.*/
- (*set_chroma_mvs)(bmvs[1],bmvs[0]);
- /*Do a MV search against the golden frame.*/
- oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
- /*We are now ready to do mode decision for this macro block.
- Mode decision is done by exhaustively examining all potential choices.
- Since we use a minimum-quality encoding strategy, this amounts to
- simply selecting the mode which uses the smallest number of bits,
- since the minimum quality will be met in any mode.
- Obviously, doing the motion compensation, fDCT, tokenization, and then
- counting the bits each token uses is computationally expensive.
- Theora's EOB runs can also split the cost of these tokens across
- multiple fragments, and naturally we don't know what the optimal
- choice of Huffman codes will be until we know all the tokens we're
- going to encode in all the fragments.
-
- So we use a simple approach to estimating the bit cost of each mode
- based upon the SAD value of the residual.
- The mathematics behind the technique are outlined by Kim \cite{Kim03},
- but the process is very simple.
- For each quality index and SAD value, we have a table containing the
- average number of bits needed to code a fragment.
- The SAD values are placed into a small number of bins (currently 16).
- The bit counts are obtained by examining actual encoded frames, with
- optimal Huffman codes selected and EOB bits appropriately divided
- among all the blocks they involve.
- A separate QIxSAD table is kept for each mode and color plane.
- It may be possible to combine many of these, but only experimentation
- will tell which ones truly represent the same distribution.
-
- @ARTICLE{Kim03,
- author="Hyun Mun Kim",
- title="Adaptive Rate Control Using Nonlinear Regression",
- journal="IEEE Transactions on Circuits and Systems for Video
- Technology",
- volume=13,
- number=5,
- pages="432--439",
- month="May",
- year=2003
- }*/
- memset(bits,0,sizeof(bits));
- mbintrabits=0;
- /*Find the SAD values for each coded fragment for each possible mode.*/
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb->map[pli][bi];
- frag=_enc->state.frags+fragi;
- efrag=_enc->frinfo+fragi;
- err[OC_MODE_INTRA][mapi]=0;
- for(ci=1;ci<64;ci++){
- err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
- }
- err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
- OC_FRAME_PREV);
- err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
- mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
- pli,OC_FRAME_PREV);
- err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
- last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
- err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
- last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
- err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
- bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
- err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
- 0,0,pli,OC_FRAME_GOLD);
- err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
- mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
- pli,OC_FRAME_GOLD);
- /*Using these distortion values, estimate the number of bits needed to
- code this fragment in each mode.*/
- qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
- mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
- OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
- qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
- bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
- OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
- qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
- for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
- bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
- OC_MINI(err[modei][mapi]>>6,15)];
- }
- }
- /*Bit costs are stored in the table with extra precision.
- Round them down to whole bits here.*/
- for(modei=0;modei<OC_NMODES;modei++){
- bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
- }
- /*Estimate the cost of coding the label for each mode.
- See comments at oc_mode_scheme_chooser_cost() for a description of the
- method.*/
- for(modei=0;modei<OC_NMODES;modei++){
- bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
- modei);
- }
- /*Add the motion vector bits for each mode that requires them.*/
- mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
- mbinfo->mvs[0][OC_FRAME_PREV][1]);
- mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
- mbinfo->mvs[0][OC_FRAME_GOLD][1]);
- mb4mvbitsa=mb4mvbitsb=0;
- for(codedi=0;codedi<ncoded_luma;codedi++){
- mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
- bmvs[0][coded[codedi]][1]);
- mb4mvbitsb+=12;
- }
- /*We use the same opportunity cost method of estimating the cost of
- coding the motion vectors with the two different schemes as we do for
- estimating the cost of the mode labels.
- However, because there are only two schemes and they're both pretty
- simple, this can just be done inline.*/
- bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
- OC_MINI(mvbitsa,mvbitsb);
- bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
- OC_MINI(mvbitsa,mvbitsb);
- bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
- mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
- /*Finally, pick the mode with the cheapest estimated bit cost.*/
- mb->mode=0;
- for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
- /*Do not select 4MV mode when not all the luma blocks are coded when
- we're in VP3 compatibility mode.*/
- if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
- continue;
- }
- mb->mode=modei;
- }
-#if defined(OC_BITRATE_STATS)
- /*Remember the error for the mode we selected in each fragment.*/
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- fragi=mb->map[mapi>>2][mapi&3];
- efrag=_enc->frinfo+fragi;
- efrag->eerror=err[mb->mode][mapi];
- }
-#endif
- /*Go back and store the selected qi index corresponding to the selected
- mode in each fragment.*/
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- fragi=mb->map[mapi>>2][mapi&3];
- frag=_enc->state.frags+fragi;
- efrag=_enc->frinfo+fragi;
- efrag->qii=(unsigned char)
- frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
- frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
- }
- inter_bits+=bits[mb->mode];
- intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
- oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
- switch(mb->mode){
- case OC_MODE_INTER_MV:{
- mvbitsa+=mbpmvbitsa;
- mvbitsb+=12;
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
- mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
- }break;
- case OC_MODE_INTER_MV_LAST:{
- mbmv[0]=last_mv[0][0];
- mbmv[1]=last_mv[0][1];
- }break;
- case OC_MODE_INTER_MV_LAST2:{
- mbmv[0]=last_mv[1][0];
- mbmv[1]=last_mv[1][1];
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- last_mv[0][0]=mbmv[0];
- last_mv[0][1]=mbmv[1];
- }break;
- case OC_MODE_INTER_MV_FOUR:{
- mvbitsa+=mb4mvbitsa;
- mvbitsb+=mb4mvbitsb;
- if(ncoded_luma>0){
- /*After 4MV mode, the last MV is the one from the last coded luma
- block.*/
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
- last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
- }
- }break;
- case OC_MODE_GOLDEN_MV:{
- mvbitsa+=mbgmvbitsa;
- mvbitsb+=12;
- mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
- mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
- }break;
- }
- if(OC_MODE_HAS_MV[mb->mode]){
- /*Special case 4MV mode.
- MVs are stored in bmvs.*/
- if(mb->mode==OC_MODE_INTER_MV_FOUR){
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb->map[pli][bi];
- frag=_enc->state.frags+fragi;
- frag->mbmode=mb->mode;
- frag->mv[0]=bmvs[!!pli][bi][0];
- frag->mv[1]=bmvs[!!pli][bi][1];
- }
- }
- /*For every other mode with a MV, it is stored in mbmv.*/
- else{
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- fragi=mb->map[mapi>>2][mapi&3];
- frag=_enc->state.frags+fragi;
- frag->mbmode=mb->mode;
- frag->mv[0]=mbmv[0];
- frag->mv[1]=mbmv[1];
- }
- }
- }
- /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
- else{
- for(codedi=0;codedi<ncoded;codedi++){
- mapi=coded[codedi];
- fragi=mb->map[mapi>>2][mapi&3];
- frag=_enc->state.frags+fragi;
- frag->mbmode=mb->mode;
- frag->mv[0]=frag->mv[1]=0;
- }
- }
- }
- }
- /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
- if(mvbitsb<mvbitsa){
- _enc->mv_scheme=1;
- inter_bits+=mvbitsb;
- }
- else{
- _enc->mv_scheme=0;
- inter_bits+=mvbitsa;
- }
- inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
- _enc->mode_scheme_chooser.scheme_list[0]];
- /*The easiest way to count the bits needed for coded/not coded fragments is
- to code them.
- We need to do this anyway, might as well do it now.*/
- oggpackB_reset(&_enc->opb_coded_flags);
- inter_bits+=oc_enc_partial_sb_flags_pack(_enc);
- inter_bits+=oc_enc_coded_sb_flags_pack(_enc);
- inter_bits+=oc_enc_coded_block_flags_pack(_enc);
- /*Select the quantizer list for INTER frames.*/
- _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
- for(qii=0;qii<_enc->state.nqis;qii++){
- _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
- }
- if(intra_bits>inter_bits){
- _enc->est_bits=inter_bits;
- return OC_INTER_FRAME;
- }
- /*All INTRA mode is smaller, but we haven't counted up the cost of all the
- not coded fragments we will now have to code.*/
- uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
- for(pli=0;pli<3;pli++){
- uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
- while(uncoded_fragi-->uncoded_fragi_end){
- fragi=*uncoded_fragi;
- frag=_enc->state.frags+fragi;
- /*Assume a very small bit cost for invalid fragments.*/
- if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
- else{
- int eerror;
- eerror=0;
- efrag=_enc->frinfo+fragi;
- for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
-#if defined(OC_BITRATE_STATS)
- efrag->eerror=eerror;
-#endif
- qi=_enc->qis[OC_INTRA_FRAME][0];
- for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
- if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
- efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
- qi=_enc->qis[OC_INTRA_FRAME][qii];
- }
- }
- intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
- OC_MINI(eerror>>8,15)];
- /*If it turns out INTRA mode was more expensive, we're done.*/
- if(intra_bits>inter_bits){
- _enc->est_bits=inter_bits;
- return OC_INTER_FRAME;
- }
- }
- }
- }
- /*So, we've compared the full cost estimates, and INTRA is still better.
- Code an INTRA frame instead.*/
- oc_enc_mark_all_intra(_enc);
- _enc->est_bits=intra_bits;
- return OC_INTRA_FRAME;
-}
-
/*Performs a motion-compensated fDCT for each fragment coded in a mode other
than INTRA.*/
-static void oc_enc_do_inter_dcts(oc_enc_ctx *_enc){
+void oc_enc_do_inter_dcts(oc_enc_ctx *_enc){
int *coded_fragi;
int *coded_fragi_end;
int pli;
@@ -2355,170 +1321,7 @@
}
}
-static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
- const theora_huff_code *codes;
- const int *mode_ranks;
- int *coded_mbi;
- int *coded_mbi_end;
- int scheme;
- scheme=_enc->mode_scheme_chooser.scheme_list[0];
- oggpackB_write(&_enc->opb,scheme,3);
- if(scheme==0){
- int ranks[8];
- int mi;
- /*The numbers associated with each mode in the stream are slightly
- different than what we use in the source.
- The lookup here converts between the two.*/
- for(mi=0;mi<OC_NMODES;mi++){
- ranks[OC_MODE_SCHEMES[6][mi]]=
- _enc->mode_scheme_chooser.scheme0_ranks[mi];
- }
- for(mi=0;mi<OC_NMODES;mi++)oggpackB_write(&_enc->opb,ranks[mi],3);
- }
- codes=_enc->mode_scheme_chooser.mode_codes[scheme];
- mode_ranks=_enc->mode_scheme_chooser.mode_ranks[scheme];
- coded_mbi=_enc->state.coded_mbis;
- coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
- for(;coded_mbi<coded_mbi_end;coded_mbi++){
- const theora_huff_code *code;
- oc_mb *mb;
- mb=_enc->state.mbs+*coded_mbi;
- code=codes+mode_ranks[mb->mode];
- oggpackB_write(&_enc->opb,code->pattern,code->nbits);
- }
-}
-static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _dx,int _dy){
- const theora_huff_code *code;
- code=OC_MV_CODES[_enc->mv_scheme]+_dx+31;
- oggpackB_write(&_enc->opb,code->pattern,code->nbits);
- code=OC_MV_CODES[_enc->mv_scheme]+_dy+31;
- oggpackB_write(&_enc->opb,code->pattern,code->nbits);
-}
-
-static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
- int *coded_mbi;
- int *coded_mbi_end;
- oggpackB_write(&_enc->opb,_enc->mv_scheme,1);
- coded_mbi=_enc->state.coded_mbis;
- coded_mbi_end=coded_mbi+_enc->state.ncoded_mbis;
- for(;coded_mbi<coded_mbi_end;coded_mbi++){
- oc_mb *mb;
- oc_mb_enc_info *mbinfo;
- int mbi;
- mbi=*coded_mbi;
- mb=_enc->state.mbs+mbi;
- switch(mb->mode){
- case OC_MODE_INTER_MV:
- case OC_MODE_GOLDEN_MV:{
- int which_frame;
- which_frame=OC_FRAME_FOR_MODE[mb->mode];
- mbinfo=_enc->mbinfo+mbi;
- oc_enc_mv_pack(_enc,mbinfo->mvs[0][which_frame][0],
- mbinfo->mvs[0][which_frame][1]);
- }break;
- case OC_MODE_INTER_MV_FOUR:{
- int bi;
- mbinfo=_enc->mbinfo+mbi;
- for(bi=0;bi<4;bi++){
- int fragi;
- fragi=mb->map[0][bi];
- if(fragi>=0&&_enc->state.frags[fragi].coded){
- oc_enc_mv_pack(_enc,mbinfo->bmvs[bi][0],mbinfo->bmvs[bi][1]);
- }
- }
- }break;
- }
- }
-}
-
-static int oc_enc_deltaframe(oc_enc_ctx *_enc){
- oggpackB_reset(&_enc->opb);
- oc_enc_mark_coded(_enc);
- /*Only proceed if we have some coded blocks.
- No coded blocks -> dropped frame -> 0 byte packet.*/
- if(_enc->state.ncoded_fragis[0]!=0||
- _enc->state.ncoded_fragis[1]!=0||
- _enc->state.ncoded_fragis[2]!=0){
- oc_enc_quant_sel_quality(_enc,0);
- _enc->state.frame_type=oc_enc_choose_mbmodes(_enc);
- if(_enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(_enc);
- oc_enc_quant_dc(_enc);
- oc_enc_residual_tokenize(_enc);
- oc_enc_frame_header_pack(_enc);
- if(_enc->state.frame_type==OC_INTER_FRAME){
- oggpackB_writecopy(&_enc->opb,
- oggpackB_get_buffer(&_enc->opb_coded_flags),
- oggpackB_bits(&_enc->opb_coded_flags));
- oc_enc_mb_modes_pack(_enc);
- oc_enc_mvs_pack(_enc);
- }
- oc_enc_block_qis_pack(_enc);
- /*Pack the quantized DCT coefficients.*/
- oc_enc_residual_tokens_pack(_enc);
- }
- /*Success: Mark the packet as ready to be flushed.*/
- _enc->packet_state=OC_PACKET_READY;
- return 0;
-}
-
-
-static int oc_enc_init(oc_enc_ctx *_enc,const theora_info *_info){
- int ret;
- /*Initialize the shared encoder/decoder state.*/
- ret=oc_state_init(&_enc->state,_info);
- if(ret<0)return ret;
- _enc->block_coded_flags=_ogg_calloc(_enc->state.nfrags,
- sizeof(_enc->block_coded_flags[0]));
- /*Initialize our packet buffers.*/
- oggpackB_writeinit(&_enc->opb);
- oggpackB_writeinit(&_enc->opb_coded_flags);
- /*Allocate and initialize storage for encoder-specific fragment and macro
- block storage, as well as DCT token storage.*/
- _enc->frinfo=_ogg_calloc(_enc->state.nfrags,
- sizeof(_enc->frinfo[0]));
- _enc->mbinfo=_ogg_calloc(_enc->state.nmbs,sizeof(_enc->mbinfo[0]));
- _enc->dct_tokens=(unsigned char **)oc_malloc_2d(64,
- _enc->state.nfrags,sizeof(_enc->dct_tokens[0][0]));
- _enc->extra_bits=(ogg_uint16_t **)oc_malloc_2d(64,
- _enc->state.nfrags,sizeof(_enc->extra_bits[0][0]));
- oc_enc_init_mbinfo(_enc);
- /*Do one-time mode scheme chooser initialization.*/
- oc_mode_scheme_chooser_init(&_enc->mode_scheme_chooser);
- /*Set the maximum distance between key frames.*/
- _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
- /*Map the qi to a multiple of JND values.*/
- _enc->qscale=_info->quality>=63?0.5F:
- 1.5F*OC_POWF(2,0.0625F*(64-_info->quality));
- /*Initialize the motion compensation, high-level importance map, and
- low-level psychovisual model plug-ins.*/
- _enc->mcenc=oc_mcenc_alloc(_enc);
- _enc->impmap=oc_impmap_alloc(_enc);
- _enc->psych=oc_psych_alloc(_enc);
- /*Reset the packet-out state machine.*/
- _enc->packet_state=OC_PACKET_INFO_HDR;
- /*Mark us as not VP3-compatible.*/
- _enc->vp3_compatible=0;
- /*Set the Huffman codes and quantization parameters to the defaults.*/
- memcpy(_enc->huff_codes,OC_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
- oc_enc_set_quant_params(_enc,NULL);
- return 0;
-}
-
-static void oc_enc_clear(oc_enc_ctx *_enc){
- oc_psych_free(_enc->psych);
- oc_impmap_free(_enc->impmap);
- oc_mcenc_free(_enc->mcenc);
- oc_free_2d(_enc->extra_bits);
- oc_free_2d(_enc->dct_tokens);
- _ogg_free(_enc->mbinfo);
- _ogg_free(_enc->frinfo);
- _ogg_free(_enc->block_coded_flags);
- oc_state_clear(&_enc->state);
-}
-
-
-
theora_enc_ctx *theora_encode_alloc(const theora_info *_info){
oc_enc_ctx *enc;
if(_info==NULL)return NULL;
@@ -2571,7 +1374,7 @@
}
_enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force,
1U<<_enc->state.info.keyframe_granule_shift);
- (*(ogg_uint32_t *)_buf)=_enc->keyframe_frequency_force;
+ *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force;
return 0;
}break;
case OC_ENCCTL_SET_VP3_COMPATIBLE:{
@@ -2591,27 +1394,57 @@
/*If we have more than 4095 super blocks, VP3's RLE coding might
overflow.
We could overcome this by ensuring we flip the coded/not-coded flags on
- at lease one super block in the frame, but we pick the simple solution
+ at least one super block in the frame, but we pick the simple solution
of just marking the stream incompatible instead.
It's unlikely the old VP3 codec would be able to decode streams at this
resolution in real time in the first place.*/
_enc->state.nsbs>4095){
vp3_compatible=0;
}
- *((int *)_buf)=vp3_compatible;
+ *(int *)_buf=vp3_compatible;
return 0;
}break;
+ case OC_ENCCTL_GET_SPLEVEL_MAX:{
+ if(_enc==NULL||_buf==NULL)return OC_FAULT;
+ if(_buf_sz!=sizeof(int))return OC_EINVAL;
+ /*We can only manipulate speed in the context of a given encoding mode.
+ Ensure one is selected if the user has not already done so.*/
+ if(_enc->set_speed==NULL)oc_enc_enable_default_mode(_enc);
+ *(int *)_buf=_enc->speed_max;
+ return 0;
+ }break;
+ case OC_ENCCTL_SET_SPLEVEL:{
+ int speed;
+ if(_enc==NULL||_buf==NULL)return OC_FAULT;
+ if(_buf_sz!=sizeof(int))return OC_EINVAL;
+ speed=*(int *)_buf;
+ /*We can only manipulate speed in the context of a given encoding mode.
+ Ensure one is selected if the user has not already done so.*/
+ if(_enc->set_speed==NULL)oc_enc_enable_default_mode(_enc);
+ if(speed<0||speed>_enc->speed_max)return OC_EINVAL;
+ (*_enc->set_speed)(_enc,speed);
+ return 0;
+ }break;
+ case OC_ENCCTL_SETUP_VBR:{
+ if(_enc==NULL)return OC_FAULT;
+ if(_buf==NULL&&_buf_sz!=0||_buf!=NULL&&_buf_sz!=sizeof(theora_vbr_cfg)){
+ return OC_EINVAL;
+ }
+ if(_enc->vbr==NULL)_enc->vbr=oc_enc_vbr_alloc(_enc);
+ return oc_enc_vbr_enable(_enc->vbr,(theora_vbr_cfg *)_buf);
+ }break;
default:return OC_IMPL;
}
}
int theora_encode_ycbcr_in(theora_enc_ctx *_enc,theora_ycbcr_buffer _img){
theora_ycbcr_buffer img;
+ int y_avail[3];
int cwidth;
int cheight;
int ret;
int rfi;
- int mbi;
+ int pli;
/*Step 1: validate parameters.*/
if(_enc==NULL||_img==NULL)return OC_FAULT;
if(_enc->packet_state==OC_PACKET_DONE)return OC_EINVAL;
@@ -2627,94 +1460,37 @@
}
/*Flip the input buffer upside down.*/
oc_ycbcr_buffer_flip(img,_img);
- /*Step 2: Update state.*/
+ /*Step 2: Update buffer state.*/
if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
- /*Right now the reconstructed frame has only the coded blocks in it.
- We either need to copy all the other blocks into it, or copy the
- reconstructed blocks back into the previous frame, whichever is
- faster.*/
+ _enc->state.ref_frame_idx[OC_FRAME_PREV]=
+ _enc->state.ref_frame_idx[OC_FRAME_SELF];
if(_enc->state.frame_type==OC_INTRA_FRAME){
- /*Intra frames always code all fragments, so there is nothing to copy.
- The new frame becomes both the previous and gold reference frames.*/
+ /*The new frame becomes both the previous and gold reference frames.*/
_enc->state.keyframe_num=_enc->state.curframe_num;
_enc->state.ref_frame_idx[OC_FRAME_GOLD]=
- _enc->state.ref_frame_idx[OC_FRAME_PREV]=
_enc->state.ref_frame_idx[OC_FRAME_SELF];
}
- else{
- int *plfragis[3];
- int ncoded;
- int nuncoded;
- int pli;
- ncoded=_enc->state.ncoded_fragis[0]+_enc->state.ncoded_fragis[1]+
- _enc->state.ncoded_fragis[2];
- nuncoded=_enc->state.nfrags-ncoded;
- if(ncoded<nuncoded&&
- _enc->state.ref_frame_idx[OC_FRAME_PREV]!=
- _enc->state.ref_frame_idx[OC_FRAME_GOLD]){
- plfragis[0]=_enc->state.coded_fragis;
- plfragis[1]=plfragis[0]+_enc->state.ncoded_fragis[0];
- plfragis[2]=plfragis[1]+_enc->state.ncoded_fragis[1];
- for(pli=0;pli<3;pli++){
- oc_state_frag_copy(&_enc->state,plfragis[pli],
- _enc->state.ncoded_fragis[pli],OC_FRAME_PREV,OC_FRAME_SELF,pli);
- }
- _enc->state.ref_frame_idx[OC_FRAME_SELF]=
- _enc->state.ref_frame_idx[OC_FRAME_PREV];
- }
- else{
- plfragis[0]=_enc->state.uncoded_fragis-_enc->state.nuncoded_fragis[0];
- plfragis[1]=plfragis[0]-_enc->state.nuncoded_fragis[1];
- plfragis[2]=plfragis[1]-_enc->state.nuncoded_fragis[2];
- for(pli=0;pli<3;pli++){
- oc_state_frag_copy(&_enc->state,plfragis[pli],
- _enc->state.nuncoded_fragis[pli],OC_FRAME_SELF,OC_FRAME_PREV,pli);
- }
- _enc->state.ref_frame_idx[OC_FRAME_PREV]=
- _enc->state.ref_frame_idx[OC_FRAME_SELF];
- }
- }
- /*Filter block edges.*/
- oc_state_loop_filter(&_enc->state,OC_FRAME_PREV);
-#if defined(OC_DUMP_IMAGES)
- oc_state_dump_frame(&_enc->state,OC_FRAME_PREV,"rec");
-#endif
- /*Fill in the borders from the reconstructed version of the last encoded
- frame.*/
- oc_state_borders_fill(&_enc->state,
- _enc->state.ref_frame_idx[OC_FRAME_PREV]);
}
+ /*If no encoding mode has been explicitly enabled by the application,
+ enable the default encoding mode with a default configuration.*/
+ else if(_enc->pipe==NULL)oc_enc_enable_default_mode(_enc);
/*Select a free buffer to use for the reconstructed version of this frame.*/
for(rfi=0;rfi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
rfi==_enc->state.ref_frame_idx[OC_FRAME_PREV];rfi++);
_enc->state.ref_frame_idx[OC_FRAME_SELF]=rfi;
- /*Move the motion vector predictors back a frame.*/
- for(mbi=_enc->state.fplanes[0].nsbs<<2;mbi-->0;){
- oc_mb_enc_info *emb;
- emb=_enc->mbinfo+mbi;
- memmove(emb->mvs+1,emb->mvs,2*sizeof(emb->mvs[0]));
- }
_enc->state.curframe_num++;
/*Fill the fragment array with pointers into the user buffer.*/
oc_state_fill_buffer_ptrs(&_enc->state,OC_FRAME_IO,img);
- /*Step 3: Analyze motion.*/
- oc_mcenc_analyze(_enc->mcenc);
- /*Step 4: Compute importance map.*/
- oc_impmap_fill(_enc->impmap,
- _enc->state.info.fps_denominator/(float)_enc->state.info.fps_numerator);
- /*Step 5: Compute coefficient tolerances.*/
- oc_psych_scan(_enc->psych,0);
- /*Step 6: Encode!*/
- if(_enc->state.curframe_num==0||
- _enc->state.curframe_num-_enc->state.keyframe_num>=
- _enc->keyframe_frequency_force){
- ret=oc_enc_keyframe(_enc);
- if(ret<0)return ret;
- }
- else{
- ret=oc_enc_deltaframe(_enc);
- if(ret<0)return ret;
- }
+ /*Reset the encoding pipeline.*/
+ ret=(*_enc->pipe->pipe_start)(_enc->pipe);
+ if(ret<0)return ret;
+ /*Push the image into the pipeline.*/
+ for(pli=0;pli<3;pli++)y_avail[pli]=_img[pli].height;
+ ret=(*_enc->pipe->pipe_proc)(_enc->pipe,y_avail);
+ if(ret<0)return ret;
+ /*Flush the results through.*/
+ ret=(*_enc->pipe->pipe_end)(_enc->pipe);
+ if(ret<0)return ret;
/*Note: All buffer management, etc., that is done after a frame is encoded
is delayed until the next frame is encoded.
This allows for a future API that would let an encoding application
@@ -2730,6 +1506,11 @@
(_enc->state.keyframe_num<<_enc->state.info.keyframe_granule_shift)+
(_enc->state.curframe_num-_enc->state.keyframe_num);
}
+#if defined(OC_DUMP_IMAGES)
+ /*This is done after the granpos update, because that's what it uses to name
+ the output file.*/
+ oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec");
+#endif
return 0;
}
Added: experimental/derf/theora-exp/lib/encvbr.c
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encvbr.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,1416 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+#include "encvbr.h"
+#include "fdct.h"
+
+
+
+/*Returns the number of bits used by the given motion vector with the VLC
+ motion vector codes (as opposed to the CLC codes, which always use 12 bits).
+ _dx: The X component of the vector, in half-pel units.
+ _dy: The Y component of the vector, in half-pel units.
+ Return: The number of bits required to store the vector with the VLC codes.*/
+static int oc_mvbitsa(int _dx,int _dy){
+ return OC_MV_CODES[0][_dx+31].nbits+OC_MV_CODES[0][_dy+31].nbits;
+}
+
+
+
+/*Select the set of quantizers to use for the current frame for each possible
+ frame type (intra or inter).
+ This does not assign a quantizer to each fragment, as that depends on the
+ quantizer type used and thus is done during mode decision.*/
+static void oc_enc_vbr_quant_sel_quality(oc_enc_ctx *_enc,int _intra_only){
+ unsigned qmax[2][3];
+ int qi_min[2];
+ int qi_max[2];
+ int fti;
+ int qti;
+ int pli;
+ int dc_qi[2];
+ qi_min[0]=_enc->vbr->cfg.kf_qi_min;
+ qi_min[1]=_enc->vbr->cfg.df_qi_min;
+ qi_max[0]=_enc->vbr->cfg.kf_qi_max;
+ qi_max[1]=_enc->vbr->cfg.df_qi_max;
+ /*The first quantizer value is used for DC coefficients.
+ Select one that allows us to meet our quality requirements.*/
+ for(qti=0;qti<1+!_intra_only;qti++)for(pli=0;pli<3;pli++){
+ qmax[qti][pli]=OC_MAXI(2U*_enc->vbr->dc_tol_mins[pli],
+ OC_DC_QUANT_MIN[qti]);
+ }
+ /*For intra frames...(containing just INTRA fragments)*/
+ for(dc_qi[0]=qi_min[0];dc_qi[0]<qi_max[0];dc_qi[0]++){
+ if(_enc->state.dequant_tables[0][0][dc_qi[0]][0]<=qmax[0][0]&&
+ _enc->state.dequant_tables[0][1][dc_qi[0]][0]<=qmax[0][1]&&
+ _enc->state.dequant_tables[0][2][dc_qi[0]][0]<=qmax[0][2]){
+ break;
+ }
+ }
+ /*For inter frames...(containing both INTER and INTRA fragments)*/
+ if(!_intra_only){
+ for(dc_qi[1]=OC_CLAMPI(qi_min[1],dc_qi[0],qi_max[1]);dc_qi[1]<qi_max[1];
+ dc_qi[1]++){
+ if(_enc->state.dequant_tables[1][0][dc_qi[1]][0]<=qmax[1][0]&&
+ _enc->state.dequant_tables[1][1][dc_qi[1]][0]<=qmax[1][1]&&
+ _enc->state.dequant_tables[1][2][dc_qi[1]][0]<=qmax[1][2]){
+ break;
+ }
+ }
+ }
+ /*Now we select a full qi list for each frame type.*/
+ for(fti=0;fti<1+!_intra_only;fti++){
+ oc_fragment_enc_info *efrag;
+ int ncoded_fragis;
+ int nqis[64];
+ int qi;
+ int qi0;
+ int qi1;
+ int qi2;
+ /*Here we count up the number of fragments that can use each qi value.
+ Unless we know this is an intra frame, we don't know what quantizer type
+ will be used for each fragment, so we just count both of them.*/
+ memset(nqis,0,sizeof(nqis));
+ if(fti){
+ int *coded_fragi;
+ int *coded_fragi_end;
+ coded_fragi=_enc->state.coded_fragis;
+ ncoded_fragis=_enc->state.ncoded_fragis[0]+
+ _enc->state.ncoded_fragis[1]+_enc->state.ncoded_fragis[2];
+ coded_fragi_end=coded_fragi+ncoded_fragis;
+ for(;coded_fragi<coded_fragi_end;coded_fragi++){
+ efrag=_enc->frinfo+*coded_fragi;
+ for(qti=0;qti<2;qti++)nqis[efrag->qi_min[qti]]++;
+ }
+ }
+ else{
+ oc_fragment_enc_info *efrag_end;
+ ncoded_fragis=_enc->state.nfrags;
+ efrag=_enc->frinfo;
+ efrag_end=efrag+ncoded_fragis;
+ for(;efrag<efrag_end;efrag++)nqis[efrag->qi_min[0]]++;
+ }
+ /*We'll now choose the qi values that divide the fragments into equally
+ sized groups, or as close as we can make it.
+ We account for the DC coefficients by adding an extra amount to the qi
+ value they require.
+ Since there are usually many more DC coefficients coded than any one AC
+ coefficient, we use 1/8 of the number of fragments, instead of 1/64.*/
+ nqis[dc_qi[fti]]+=(ncoded_fragis<<fti)+7>>3;
+ /*Convert this into a moment table.*/
+ for(qi=63;qi-->0;)nqis[qi]+=nqis[qi+1];
+ /*If we have a lower limit on the QI range, promote and fragments with a
+ smaller QI, to ensure they're counted.*/
+ if(qi_min[fti]>0)nqis[qi_min[fti]]=nqis[0];
+ /*Select our first quantizer.*/
+ for(qi0=qi_max[fti]+1;qi0-->qi_min[fti]&&nqis[qi0]<=0;);
+ for(qi1=qi0-1;qi1>=qi_min[fti]&&nqis[qi1]<=nqis[qi0];qi1--);
+ /*Test to make sure there are even two unique quantizers.*/
+ if(qi1>=qi_min[fti]){
+ ogg_int64_t best_metric;
+ ogg_int64_t metric;
+ int best_qi1;
+ int best_qi2;
+ int qii;
+ for(qi2=qi1-1;qi2>=qi_min[fti]&&nqis[qi2]<=nqis[qi1];qi2--);
+ /*Test to make sure there are three unique quantizers.*/
+ if(qi2>=0){
+ best_metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+ (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+ best_qi1=qi1;
+ best_qi2=qi2;
+ for(;nqis[qi1]<nqis[1];qi1--){
+ for(qi2=qi1-1;nqis[qi2]<nqis[0];qi2--){
+ metric=(ogg_int64_t)(nqis[0]-nqis[qi2+1])*
+ (nqis[qi2+1]-nqis[qi1+1])*nqis[qi1+1];
+ if(metric>=best_metric){
+ best_qi1=qi1;
+ best_qi2=qi2;
+ best_metric=metric;
+ }
+ }
+ }
+ _enc->qis[fti][0]=qi0;
+ _enc->qis[fti][1]=best_qi1;
+ _enc->qis[fti][2]=best_qi2;
+ _enc->nqis[fti]=3;
+ }
+ else{
+ best_metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+ best_qi1=qi1;
+ if(qi1>0)for(qi1--;nqis[qi1]<nqis[0];qi1--){
+ metric=(ogg_int64_t)(nqis[0]-nqis[qi1+1])*nqis[qi1+1];
+ if(metric>best_metric){
+ best_qi1=qi1;
+ best_metric=metric;
+ }
+ }
+ _enc->qis[fti][0]=qi0;
+ _enc->qis[fti][1]=best_qi1;
+ _enc->nqis[fti]=2;
+ }
+ /*Right now qis[0] is the largest.
+ We want to use the smallest that is still large enough for our DC
+ coefficients.*/
+ for(qii=1;qii<_enc->nqis[fti];qii++)if(_enc->qis[fti][qii]>=dc_qi[fti]){
+ qi0=_enc->qis[fti][0];
+ _enc->qis[fti][0]=_enc->qis[fti][qii];
+ _enc->qis[fti][qii]=qi0;
+ }
+ }
+ else{
+ _enc->qis[fti][0]=qi0;
+ _enc->nqis[fti]=1;
+ }
+ /*If we're in VP3 compatibility mode, just use the first quantizer.*/
+ if(_enc->vp3_compatible)_enc->nqis[fti]=1;
+ }
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+ This also selects a quantizer value for each fragment and builds up the
+ coded fragment list (in coded order) and clears the uncoded fragment list.
+ It does not update the coded macro block list, as that is not used when
+ coding INTRA frames.*/
+static void oc_enc_vbr_mark_all_intra(oc_enc_ctx *_enc){
+ oc_sb *sb;
+ oc_sb *sb_end;
+ int pli;
+ int qii;
+ int ncoded_fragis;
+ int prev_ncoded_fragis;
+ /*Select the quantizer list for INTRA frames.*/
+ _enc->state.nqis=_enc->nqis[OC_INTRA_FRAME];
+ for(qii=0;qii<_enc->state.nqis;qii++){
+ _enc->state.qis[qii]=_enc->qis[OC_INTRA_FRAME][qii];
+ }
+ prev_ncoded_fragis=ncoded_fragis=0;
+ sb=sb_end=_enc->state.sbs;
+ for(pli=0;pli<3;pli++){
+ const oc_fragment_plane *fplane;
+ fplane=_enc->state.fplanes+pli;
+ sb_end+=fplane->nsbs;
+ for(;sb<sb_end;sb++){
+ int quadi;
+ for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+ int bi;
+ for(bi=0;bi<4;bi++)if(sb->map[quadi][bi]>=0){
+ oc_fragment_enc_info *efrag;
+ oc_fragment *frag;
+ int fragi;
+ int best_qii;
+ fragi=sb->map[quadi][bi];
+ frag=_enc->state.frags+fragi;
+ frag->coded=1;
+ frag->mbmode=OC_MODE_INTRA;
+ efrag=_enc->frinfo+fragi;
+ best_qii=0;
+ for(qii=1;qii<_enc->state.nqis;qii++){
+ if(efrag->qi_min[0]<=_enc->state.qis[qii]&&
+ (_enc->state.qis[best_qii]<efrag->qi_min[0]||
+ _enc->state.qis[qii]<_enc->state.qis[best_qii])){
+ best_qii=qii;
+ }
+ }
+ efrag->qii=(unsigned char)best_qii;
+ frag->qi=_enc->state.qis[best_qii];
+ _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+#if defined(OC_BITRATE_STATS)
+ /*Compute the error function used for intra mode fragments.
+ This function can only use information known at mode decision time, and
+ so excludes the DC component.
+ TODO: Separate this out somewhere more useful.*/
+ {
+ oc_fragment_enc_info *efrag;
+ int ci;
+ int eerror;
+ efrag=_enc->frinfo+fragi;
+ eerror=0;
+ for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+ efrag->eerror=eerror;
+ }
+#endif
+ }
+ }
+ }
+ _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+ prev_ncoded_fragis=ncoded_fragis;
+ _enc->state.nuncoded_fragis[pli]=0;
+ }
+ _enc->ncoded_frags=ncoded_fragis;
+}
+
+
+
+/*Quantize and predict the DC coefficients.
+ This is done in a separate step because the prediction of DC coefficients
+ occurs in image order, not in the Hilbert-curve order, unlike the rest of
+ the encoding process.*/
+static void oc_enc_vbr_quant_dc(oc_enc_ctx *_enc){
+ oc_fragment_enc_info *efrag;
+ oc_fragment *frag;
+ int pli;
+ frag=_enc->state.frags;
+ efrag=_enc->frinfo;
+ for(pli=0;pli<3;pli++){
+ oc_fragment_plane *fplane;
+ unsigned fquant;
+ unsigned iquant;
+ int pred_last[3];
+ int fragx;
+ int fragy;
+ pred_last[OC_FRAME_GOLD]=0;
+ pred_last[OC_FRAME_PREV]=0;
+ pred_last[OC_FRAME_SELF]=0;
+ fplane=_enc->state.fplanes+pli;
+ for(fragy=0;fragy<fplane->nvfrags;fragy++){
+ for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++,efrag++){
+ int qc_pred;
+ int qc;
+ if(!frag->coded)continue;
+ qc_pred=oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
+ /*Fragments outside the displayable region must still be coded in key
+ frames.
+ To minimize wasted bits, just use the predicted DC value.
+ TODO: We might do a better job in the lower-left hand corner by
+ propagating over the DC value of the first actually coded fragment,
+ but for the moment this is not done.*/
+ if(frag->invalid)qc=0;
+ else{
+ int c;
+ int c_abs;
+ int qti;
+ /*We now center the DC coefficient range around the predicted value
+ and perform token bits optimization based on the HVS-determined
+ tolerance range.
+ For more details, see oc_enc_vbr_frag_quant_tokenize().*/
+ qti=frag->mbmode!=OC_MODE_INTRA;
+ iquant=_enc->state.dequant_tables[qti][pli][_enc->state.qis[0]][0];
+ c=efrag->dct_coeffs[0]-qc_pred*iquant;
+ c_abs=abs(c);
+ if(c_abs<=efrag->tols[0])qc=0;
+ else{
+ int qc_signed[2];
+ int qc_max;
+ int qc_min;
+ int qc_offs;
+ int c_sign;
+ int c_min;
+ int c_recon;
+ int cati;
+ fquant=_enc->enquant_tables[qti][pli][_enc->state.qis[0]][0];
+ qc_max=(ogg_int32_t)c_abs*fquant+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+ c_sign=c<0;
+ c_recon=(qc_max-1)*iquant;
+ c_min=OC_MAXI(0,c_abs-efrag->tols[0]);
+ for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=iquant;
+ if(qc_min<3+OC_NDCT_VAL_CAT2_SIZE)qc=qc_min;
+ else{
+ qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+ for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+ cati++){
+ qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+ }
+ qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+ }
+ qc_signed[0]=qc;
+ qc_signed[1]=-qc;
+ qc=qc_signed[c_sign];
+ }
+ }
+ pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc=qc+qc_pred;
+ efrag->dct_coeffs[0]=(ogg_int16_t)qc;
+ }
+ }
+ }
+}
+
+/*Quantize and tokenize the given fragment.
+ _efrag: The encoder information for the fragment to quantize.
+ _qcoeffs: The quantized coefficients, in zig-zag order.
+ _fquant: The forward quantization matrix to use.
+ _iquant: The inverse quantization matrix to use.
+ Return: The number of coefficients before any final zero run.*/
+static int oc_enc_vbr_frag_quant_tokenize(oc_enc_ctx *_enc,
+ oc_fragment_enc_info *_efrag,ogg_int16_t _qcoeffs[64],
+ const ogg_uint16_t _fquant[64],const ogg_uint16_t _iquant[64]){
+ int zzi;
+ int zrun;
+ int qc;
+ int qc_offs;
+ int c_sign;
+ int cati;
+ int tli;
+ /*The DC coefficient is already quantized (it had to be for DC prediction).
+ Here we just tokenize it.*/
+ if(_efrag->dct_coeffs[0]){
+ qc=abs(_efrag->dct_coeffs[0]);
+ c_sign=_efrag->dct_coeffs[0]<0;
+ switch(qc){
+ case 1:{
+ _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+ (unsigned char)(OC_ONE_TOKEN+c_sign);
+ }break;
+ case 2:{
+ _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+ (unsigned char)(OC_TWO_TOKEN+c_sign);
+ }break;
+ default:{
+ if(qc-3<OC_NDCT_VAL_CAT2_SIZE){
+ _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+ (unsigned char)(OC_DCT_VAL_CAT2+qc-3);
+ _enc->extra_bits[0][_enc->nextra_bits[0]++]=(ogg_uint16_t)c_sign;
+ }
+ else{
+ qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+ for(cati=0;qc>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];cati++){
+ qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+ }
+ _enc->dct_tokens[0][_enc->ndct_tokens[0]++]=
+ (unsigned char)(OC_DCT_VAL_CAT3+cati);
+ _enc->extra_bits[0][_enc->nextra_bits[0]++]=
+ (ogg_uint16_t)((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+ }
+ }
+ }
+ zrun=0;
+ }
+ else zrun=1;
+ /*Now we quantize and tokenize each AC coefficient.*/
+ for(zzi=1;zzi<64;zzi++){
+ int qc_signed[2];
+ int qc_max;
+ int qc_min;
+ int c_sign;
+ int c_abs;
+ int c_min;
+ int c_recon;
+ int ci;
+ ci=OC_FZIG_ZAG[zzi];
+ c_abs=abs(_efrag->dct_coeffs[ci]);
+ /*Best case: we can encode this as a zero.*/
+ if(c_abs<=_efrag->tols[ci]){
+ zrun++;
+ _qcoeffs[zzi]=0;
+ }
+ else{
+ c_sign=_efrag->dct_coeffs[ci]<0;
+ /*qc_max is the most accurate quantized value.
+ This is the largest possible (absolute) value we will use.*/
+ qc_max=(ogg_int32_t)c_abs*_fquant[ci]+OC_FQUANT_ROUND>>OC_FQUANT_SHIFT;
+ /*qc_min is the smallest possible (by absolute value) quantized value
+ whose dequantized value is within the HVS-determined tolerance
+ range.*/
+ /*TODO: qc_min could be computed by a division (we do not want to allow
+ the rounding errors that are possible with the mul+shift quantization
+ used for qc_max), which would allow qc_max to be calculated only if
+ needed below.
+ Is this faster?
+ Who knows.*/
+ c_recon=(qc_max-1)*_iquant[ci];
+ c_min=c_abs-_efrag->tols[ci];
+ for(qc_min=qc_max;c_recon>=c_min;qc_min--)c_recon-=_iquant[ci];
+ /*We now proceed to find a token that is as close to qc_max as possible,
+ but does not use any more bits than would be required for qc_min.
+ The general assumption we make is that encoding a value closer to 0
+ always uses fewer bits.
+ qc_min can still reach 0 here despite the test above, if the quantizer
+ value is larger than the tolerance (which can happen for very small
+ tolerances; the quantizer value has a minimum it cannot go below).*/
+ if(qc_min==0){
+ zrun++;
+ _qcoeffs[zzi]=0;
+ }
+ else{
+ /*If we have an outstanding zero run, code it now.*/
+ if(zrun>0){
+ /*The zero run tokens appear on the list for the first zero in the
+ run.*/
+ tli=zzi-zrun;
+ /*Second assumption: coding a combined run/value token always uses
+ fewer bits than coding them separately.*/
+ /*CAT1 run/value tokens: the value is 1.*/
+ if(qc_min==1&&zrun<=17){
+ if(zrun<=5){
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ (unsigned char)(OC_DCT_RUN_CAT1A+(zrun-1));
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)c_sign;
+ }
+ else if(zrun<=9){
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_RUN_CAT1B;
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)((c_sign<<2)+zrun-6);
+ }
+ else{
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_RUN_CAT1C;
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)((c_sign<<3)+zrun-10);
+ }
+ qc_signed[0]=1;
+ qc_signed[1]=-1;
+ _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+ zrun=0;
+ /*Skip coding the DCT value below.*/
+ continue;
+ }
+ /*CAT2 run/value tokens: the value is 2-3.*/
+ else if(qc_min<=3&&zrun<=3){
+ if(zrun==1){
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_RUN_CAT2A;
+ qc=OC_MINI(3,qc_max);
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)((c_sign<<1)+qc-2);
+ }
+ else{
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_RUN_CAT2B;
+ qc=OC_MINI(3,qc_max);
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)((c_sign<<2)+(qc-2<<1)+zrun-2);
+ }
+ qc_signed[0]=qc;
+ qc_signed[1]=-qc;
+ _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+ zrun=0;
+ /*Skip coding the DCT value below.*/
+ continue;
+ }
+ /*The run is too long or the quantized value too large: code them
+ separately.*/
+ else{
+ /*This is stupid: non-short ZRL tokens are never used for run
+ values less than 9, but codewords are reserved for them,
+ wasting bits.
+ Yes, yes, this would've meant a non-constant number of extra
+ bits for this token, but even so.*/
+ if(zrun<=8){
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_SHORT_ZRL_TOKEN;
+ }
+ else{
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=
+ OC_DCT_ZRL_TOKEN;
+ }
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=
+ (ogg_uint16_t)(zrun-1);
+ zrun=0;
+ }
+ }
+ /*No zero run, or the run and the qc value are being coded
+ separately.*/
+ switch(qc_min){
+ case 1:{
+ _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+ (unsigned char)(OC_ONE_TOKEN+c_sign);
+ _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<1)+1);
+ }break;
+ case 2:{
+ _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+ (unsigned char)(OC_TWO_TOKEN+c_sign);
+ _qcoeffs[zzi]=(ogg_int16_t)((-c_sign<<2)+2);
+ }break;
+ default:{
+ if(qc_min-3<OC_NDCT_VAL_CAT2_SIZE){
+ _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+ (unsigned char)(OC_DCT_VAL_CAT2+qc_min-3);
+ _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=
+ (ogg_uint16_t)c_sign;
+ qc_signed[0]=qc_min;
+ qc_signed[1]=-qc_min;
+ _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+ }
+ else{
+ qc_offs=3+OC_NDCT_VAL_CAT2_SIZE;
+ for(cati=0;cati<5&&qc_min>=qc_offs+OC_DCT_VAL_CAT_SIZES[cati];
+ cati++){
+ qc_offs+=OC_DCT_VAL_CAT_SIZES[cati];
+ }
+ /*qc_min can be encoded in this category.
+ Since all DCT values in the category use the same number of
+ bits, we encode the closest value to qc_max.
+ This is either qc_max itself, if it is in the category's
+ range, or the largest value in the category.*/
+ qc=OC_MINI(qc_offs+OC_DCT_VAL_CAT_SIZES[cati]-1,qc_max);
+ qc_signed[0]=qc;
+ qc_signed[1]=-qc;
+ _qcoeffs[zzi]=(ogg_int16_t)qc_signed[c_sign];
+ _enc->dct_tokens[zzi][_enc->ndct_tokens[zzi]++]=
+ (unsigned char)(OC_DCT_VAL_CAT3+cati);
+ _enc->extra_bits[zzi][_enc->nextra_bits[zzi]++]=(ogg_uint16_t)
+ ((c_sign<<OC_DCT_VAL_CAT_SHIFTS[cati])+qc-qc_offs);
+ }
+ }
+ }
+ }
+ }
+ }
+ /*If there's a trailing zero run, code an EOB token.*/
+ if(zrun>0){
+ int old_tok;
+ int toki;
+ int ebi;
+ tli=64-zrun;
+ toki=_enc->ndct_tokens[tli]-1;
+ if(toki>=0)old_tok=_enc->dct_tokens[tli][toki];
+ else old_tok=-1;
+ /*Try to extend an EOB run.*/
+ switch(old_tok){
+ case OC_DCT_EOB1_TOKEN:
+ case OC_DCT_EOB2_TOKEN:{
+ _enc->dct_tokens[tli][toki]++;
+ }break;
+ case OC_DCT_EOB3_TOKEN:{
+ _enc->dct_tokens[tli][toki]++;
+ _enc->extra_bits[tli][_enc->nextra_bits[tli]++]=0;
+ }break;
+ case OC_DCT_REPEAT_RUN0_TOKEN:{
+ ebi=_enc->nextra_bits[tli]-1;
+ if(_enc->extra_bits[tli][ebi]<3)_enc->extra_bits[tli][ebi]++;
+ else{
+ _enc->dct_tokens[tli][toki]++;
+ _enc->extra_bits[tli][ebi]=0;
+ }
+ }break;
+ case OC_DCT_REPEAT_RUN1_TOKEN:{
+ ebi=_enc->nextra_bits[tli]-1;
+ if(_enc->extra_bits[tli][ebi]<7)_enc->extra_bits[tli][ebi]++;
+ else{
+ _enc->dct_tokens[tli][toki]++;
+ _enc->extra_bits[tli][ebi]=0;
+ }
+ }break;
+ case OC_DCT_REPEAT_RUN2_TOKEN:{
+ ebi=_enc->nextra_bits[tli]-1;
+ if(_enc->extra_bits[tli][ebi]<15)_enc->extra_bits[tli][ebi]++;
+ else{
+ _enc->dct_tokens[tli][toki]++;
+ /*Again stupid: we could encode runs up to 4127, but inexplicably
+ they don't subtract the bottom of the range here, so we can only
+ go to 4095 (unless we want to change the spec to deal with
+ wrap-around).*/
+ _enc->extra_bits[tli][ebi]=32;
+ }
+ }break;
+ case OC_DCT_REPEAT_RUN3_TOKEN:{
+ ebi=_enc->nextra_bits[tli]-1;
+ if(_enc->extra_bits[tli][ebi]<4095){
+ _enc->extra_bits[tli][ebi]++;
+ break;
+ }
+ /*else fall through.*/
+ }
+ /*Start a new EOB run.*/
+ default:{
+ _enc->dct_tokens[tli][_enc->ndct_tokens[tli]++]=OC_DCT_EOB1_TOKEN;
+ }
+ }
+ }
+ /*Return the number of coefficients before the final zero run.*/
+ return 64-zrun;
+}
+
+static void oc_enc_vbr_residual_tokenize(oc_enc_ctx *_enc){
+ int *coded_fragi;
+ int *coded_fragi_end;
+ int pli;
+ int zzi;
+ /*Clear any existing DCT tokens.*/
+ for(zzi=0;zzi<64;zzi++){
+ _enc->ndct_tokens[zzi]=_enc->nextra_bits[zzi]=0;
+ _enc->extra_bits_offs[zzi]=0;
+ }
+ coded_fragi_end=coded_fragi=_enc->state.coded_fragis;
+ for(pli=0;pli<3;pli++){
+ memcpy(_enc->dct_token_offs[pli],_enc->ndct_tokens,
+ sizeof(_enc->dct_token_offs[pli]));
+ coded_fragi_end+=_enc->state.ncoded_fragis[pli];
+ for(;coded_fragi<coded_fragi_end;coded_fragi++){
+ oc_quant_table *iquants;
+ oc_fragment *frag;
+ oc_fragment_enc_info *efrag;
+ ogg_int16_t qcoeffs[64];
+ int fragi;
+ int qti;
+ int nnzc;
+ fragi=*coded_fragi;
+ frag=_enc->state.frags+fragi;
+ efrag=_enc->frinfo+fragi;
+ qti=frag->mbmode!=OC_MODE_INTRA;
+ iquants=_enc->state.dequant_tables[qti][pli];
+ nnzc=oc_enc_vbr_frag_quant_tokenize(_enc,efrag,qcoeffs,
+ _enc->enquant_tables[qti][pli][frag->qi],iquants[frag->qi]);
+ /*While we're here and things are in cache, reconstruct the quantized
+ fragment.*/
+ oc_state_frag_recon(&_enc->state,frag,pli,qcoeffs,nnzc,nnzc,
+ iquants[_enc->state.qis[0]][0],iquants[frag->qi]);
+ }
+ }
+ /*Merge the final EOB run of one coefficient list with the start of the
+ next, if possible.*/
+ for(zzi=1;zzi<64;zzi++){
+ static const int OC_EOB_RANGE[OC_NDCT_EOB_TOKEN_MAX]={1,1,1,4,8,16,4096};
+ static const int OC_EOB_OFFS[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
+ int old_tok1;
+ int old_tok2;
+ int old_eb1;
+ int old_eb2;
+ int new_tok;
+ int toki;
+ int zzj;
+ int ebi;
+ int runl;
+ /*Make sure this coefficient has tokens at all.*/
+ if(_enc->ndct_tokens[zzi]<=0)continue;
+ /*Ensure the first token is an EOB run.*/
+ old_tok2=_enc->dct_tokens[zzi][0];
+ if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+ /*Search for a previous coefficient that has any tokens at all.*/
+ old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+ zzj=zzi-1;
+ do{
+ toki=_enc->ndct_tokens[zzj]-1;
+ if(toki>=_enc->dct_token_offs[0][zzj]){
+ old_tok1=_enc->dct_tokens[zzj][toki];
+ break;
+ }
+ }
+ while(zzj-->0);
+ /*Ensure its last token was an EOB run.*/
+ if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+ /*Pull off the associated extra bits, if any, and decode the runs.*/
+ ebi=_enc->nextra_bits[zzj];
+ old_eb1=OC_DCT_TOKEN_EXTRA_BITS[old_tok1]?_enc->extra_bits[zzj][--ebi]:0;
+ old_eb2=OC_DCT_TOKEN_EXTRA_BITS[old_tok2]?_enc->extra_bits[zzi][0]:0;
+ runl=OC_EOB_OFFS[old_tok1]+old_eb1+OC_EOB_OFFS[old_tok2]+old_eb2;
+ /*We can't possibly combine these into one run.
+ It might be possible to split them more optimally, but we'll just leave
+ them as is.*/
+ if(runl>=4096)continue;
+ /*We CAN combine them into one run.*/
+ for(new_tok=OC_DCT_EOB1_TOKEN;
+ runl-OC_EOB_OFFS[new_tok]>=OC_EOB_RANGE[new_tok];new_tok++);
+ /*toki is always initialized.
+ If your compiler thinks otherwise, it is dumb.*/
+ _enc->dct_tokens[zzj][toki]=(unsigned char)new_tok;
+ /*Update the two token lists.*/
+ if(OC_DCT_TOKEN_EXTRA_BITS[new_tok]){
+ _enc->extra_bits[zzj][ebi++]=(ogg_uint16_t)(
+ runl-OC_EOB_OFFS[new_tok]);
+ }
+ _enc->nextra_bits[zzj]=ebi;
+ _enc->dct_token_offs[0][zzi]++;
+ /*Note: We don't bother to update the offsets for planes 1 and 2 if
+ planes 0 or 1 don't have any tokens.
+ This turns out not to matter due to the way we use the offsets later.*/
+ if(OC_DCT_TOKEN_EXTRA_BITS[old_tok2])_enc->extra_bits_offs[zzi]++;
+ }
+}
+
+/*Marks each fragment as coded or not, based on the coefficient-level
+ thresholds computed in the psychovisual stage.
+ The MB mode of the fragments are not set, as they will be computed in
+ oc_enc_choose_mbmodes().
+ This also builds up the coded fragment and uncoded fragment lists.
+ The coded MB list is not built up.
+ That is done during mode decision.*/
+static void oc_enc_mark_coded(oc_enc_ctx *_enc){
+ oc_sb *sb;
+ oc_sb *sb_end;
+ int pli;
+ int bli;
+ int ncoded_fragis;
+ int prev_ncoded_fragis;
+ int nuncoded_fragis;
+ int prev_nuncoded_fragis;
+ _enc->nblock_coded_flags=bli=0;
+ prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
+ sb=sb_end=_enc->state.sbs;
+ for(pli=0;pli<3;pli++){
+ const oc_fragment_plane *fplane;
+ int ystride;
+ int prev_refi;
+ fplane=_enc->state.fplanes+pli;
+ sb_end+=fplane->nsbs;
+ prev_refi=_enc->state.ref_frame_idx[OC_FRAME_PREV];
+ ystride=_enc->state.ref_frame_bufs[prev_refi][pli].ystride;
+ for(;sb<sb_end;sb++){
+ int quadi;
+ sb->coded_fully=1;
+ sb->coded_partially=0;
+ for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
+ int bi;
+ for(bi=0;bi<4;bi++){
+ int fragi;
+ fragi=sb->map[quadi][bi];
+ if(fragi>=0){
+ oc_fragment *frag;
+ int flag;
+ frag=_enc->state.frags+fragi;
+ if(frag->invalid){
+ frag->coded=0;
+ *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+ }
+ else{
+ oc_fragment_enc_info *efrag;
+ ogg_int16_t dct_buf[64];
+ int ci;
+ /*Check to see if the fragment can be skipped.
+ It is assumed that a skipped fragment always takes fewer bits
+ than a coded fragment, though this may not necessarily be true.
+ A single skipped fragment could take up to 34 bits to encode
+ its location in the RLE scheme Theora uses */
+ oc_frag_intra_fdct(frag,dct_buf,ystride,prev_refi);
+ efrag=_enc->frinfo+fragi;
+ /*The comparison against OC_DC_QUANT_MIN and OC_AC_QUANT_MIN
+ ensures we mark a fragment as skipped if it would be quantized
+ to all zeros in OC_MODE_INTER_NOMV.
+ These minimum quantizers represent the maximum quality the
+ format is capable of, and can be larger than our tolerances.
+ The minimum for INTER modes is twice the minimum for INTRA
+ modes, so technically if the tolerances are below this
+ threshold, we might be able to do a better job representing
+ this fragment by coding it in INTRA mode.
+ But the number of extra bits required to do that would be
+ ridiculous, so we give up our devotion to minimum quality just
+ this once.
+
+ Note: OC_DC_QUANT_MIN[0] should actually be
+ OC_DC_QUANT_MIN[1]>>1, but in this case those are
+ equivalent.*/
+ ci=0;
+ if((unsigned)abs(dct_buf[0]-efrag->dct_coeffs[0])<=
+ OC_MAXI(efrag->tols[0],OC_DC_QUANT_MIN[0])){
+ for(ci++;ci<64;ci++){
+ if((unsigned)abs(dct_buf[ci]-efrag->dct_coeffs[ci])>
+ OC_MAXI(efrag->tols[ci],OC_AC_QUANT_MIN[0])){
+ break;
+ }
+ }
+ }
+ if(ci>=64){
+ frag->coded=0;
+ *(_enc->state.uncoded_fragis-++nuncoded_fragis)=fragi;
+ }
+ else{
+ frag->coded=1;
+ _enc->state.coded_fragis[ncoded_fragis++]=fragi;
+ }
+ }
+ flag=frag->coded;
+ sb->coded_fully&=flag;
+ sb->coded_partially|=flag;
+ _enc->block_coded_flags[bli++]=(char)flag;
+ }
+ }
+ }
+ /*If this is a partially coded super block, keep the entries just added
+ to the code block flag list.*/
+ if(!sb->coded_fully&&sb->coded_partially){
+ _enc->nblock_coded_flags=bli;
+ }
+ /*Otherwise, discard these entries from the list, as they are
+ implicit.*/
+ else{
+ sb->coded_partially=0;
+ bli=_enc->nblock_coded_flags;
+ }
+ }
+ _enc->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+ prev_ncoded_fragis=ncoded_fragis;
+ _enc->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
+ prev_nuncoded_fragis=nuncoded_fragis;
+ }
+ _enc->ncoded_frags=ncoded_fragis;
+}
+
+/*Selects an appropriate coding mode for each macro block.
+ A mode is chosen for the macro blocks with at least one coded fragment.
+ A bit cost estimate for coding the frame with the selected modes is made,
+ and a similar estimate is made for coding the frame as a key frame.
+ These estimates are used to select the optimal frame type.
+ Return: The frame type to encode with: OC_INTER_FRAME or OC_INTRA_FRAME.*/
+static int oc_enc_choose_mbmodes(oc_enc_ctx *_enc){
+ oc_set_chroma_mvs_func set_chroma_mvs;
+ oc_fragment_enc_info *efrag;
+ oc_fragment *frag;
+ oc_mb *mb;
+ oc_mb_enc_info *mbinfo;
+ char last_mv[2][2];
+ int *uncoded_fragi;
+ int *uncoded_fragi_end;
+ int best_qii;
+ int qii;
+ int qi;
+ int pli;
+ int mbi;
+ int fragi;
+ int ci;
+ int nmbs;
+ int mvbitsa;
+ int mvbitsb;
+ int intra_bits;
+ int inter_bits;
+ nmbs=_enc->state.nmbs;
+ set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+ oc_mode_scheme_chooser_reset(&_enc->mode_scheme_chooser);
+ memset(last_mv,0,sizeof(last_mv));
+ mbinfo=_enc->mbinfo;
+ mvbitsa=mvbitsb=0;
+ inter_bits=2+7*_enc->state.nqis-(_enc->state.nqis==3);
+ intra_bits=inter_bits+3;
+ _enc->state.ncoded_mbis=0;
+ for(mbi=0;mbi<nmbs;mbi++){
+ mb=_enc->state.mbs+mbi;
+ if(mb->mode!=OC_MODE_INVALID){
+ oc_fragment_enc_info *efrag;
+ char bmvs[2][4][2];
+ char mbmv[2];
+ int err[OC_NMODES][12];
+ int bits[OC_NMODES];
+ int coded[13];
+ int frag_qii[12][2][2];
+ int ncoded;
+ int ncoded_luma;
+ int mapii;
+ int mapi;
+ int modei;
+ int codedi;
+ int mbintrabits;
+ int mbpmvbitsa;
+ int mbgmvbitsa;
+ int mb4mvbitsa;
+ int mb4mvbitsb;
+ int fti;
+ int qti;
+ int bi;
+ mbinfo=_enc->mbinfo+mbi;
+ /*Build up a list of coded fragments.*/
+ ncoded=0;
+ for(mapii=0;mapii<OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];mapii++){
+ mapi=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt][mapii];
+ fragi=mb->map[mapi>>2][mapi&3];
+ if(fragi>=0&&_enc->state.frags[fragi].coded)coded[ncoded++]=mapi;
+ }
+ /*If we don't find any, mark this MB not coded and move on.*/
+ if(ncoded<=0){
+ mb->mode=OC_MODE_NOT_CODED;
+ /*Don't bother to do a MV search against the golden frame.
+ Just re-use the last vector, which should match well since the
+ contents of the MB haven't changed much.*/
+ mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[1][OC_FRAME_GOLD][0];
+ mbinfo->mvs[0][OC_FRAME_GOLD][1]=mbinfo->mvs[1][OC_FRAME_GOLD][1];
+ continue;
+ }
+ /*Count the number of coded blocks that are luma blocks, and replace the
+ block MVs for not-coded blocks with (0,0).*/
+ memcpy(bmvs[0],mbinfo->bmvs,sizeof(bmvs[0]));
+ /*Mark the end of the list so we don't go past it below.*/
+ coded[ncoded]=-1;
+ for(mapi=ncoded_luma=0;mapi<4;mapi++){
+ if(coded[ncoded_luma]==mapi)ncoded_luma++;
+ else bmvs[0][mapi][0]=bmvs[0][mapi][1]=0;
+ }
+ /*Select a qi value for each coded fragment for each frame type and
+ quantizer type.*/
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ efrag=_enc->frinfo+mb->map[mapi>>2][mapi&3];
+ for(fti=0;fti<2;fti++)for(qti=0;qti<=fti;qti++){
+ best_qii=0;
+ for(qii=1;qii<_enc->nqis[fti];qii++){
+ if(efrag->qi_min[qti]<=_enc->qis[fti][qii]&&
+ (_enc->qis[fti][qii]<_enc->qis[fti][best_qii]||
+ _enc->qis[fti][best_qii]<efrag->qi_min[qti])){
+ best_qii=qii;
+ }
+ }
+ frag_qii[codedi][fti][qti]=best_qii;
+ }
+ }
+ /*Special case: If no luma blocks are coded, but some chroma blocks are,
+ then the macro block defaults to OC_MODE_INTER_NOMV, and no mode need
+ be explicitly coded for it.*/
+ if(ncoded_luma<=0){
+ mb->mode=OC_MODE_NOT_CODED;
+ /*Don't bother to do a MV search against the golden frame.*/
+ mbinfo->mvs[0][OC_FRAME_GOLD][0]=mbinfo->mvs[0][OC_FRAME_GOLD][1]=0;
+ /*We do collect bitrate stats for frame type decision.*/
+ mbintrabits=bits[OC_MODE_INTER_NOMV]=0;
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ pli=mapi>>2;
+ fragi=mb->map[pli][mapi&3];
+ frag=_enc->state.frags+fragi;
+ efrag=_enc->frinfo+fragi;
+ /*Set the MB mode and MV in the fragment.*/
+ frag->mbmode=OC_MODE_INTER_NOMV;
+ frag->mv[0]=frag->mv[1]=0;
+ /*Calculate the bitrate estimates.*/
+ err[OC_MODE_INTRA][mapi]=0;
+ for(ci=1;ci<64;ci++){
+ err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+ }
+ err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+ OC_FRAME_PREV);
+ qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+ mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+ OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+ qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+ bits[OC_MODE_INTER_NOMV]+=OC_RES_BITRATES[qi][pli][
+ OC_MODE_INTER_NOMV][OC_MINI(err[OC_MODE_INTER_NOMV][mapi]>>6,15)];
+ /*Also mark this fragment with the selected INTER qi.
+ It will be reset if we eventually code this as an INTRA frame.*/
+#if defined(OC_BITRATE_STATS)
+ efrag->eerror=err[OC_MODE_INTER_NOMV][mapi];
+#endif
+ efrag->qii=(unsigned char)frag_qii[codedi][OC_INTER_FRAME][1];
+ frag->qi=qi;
+ }
+ intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+ inter_bits+=bits[OC_MODE_INTER_NOMV]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+ continue;
+ }
+ /*Otherwise, add this to the coded MB list.*/
+ _enc->state.coded_mbis[_enc->state.ncoded_mbis++]=mbi;
+ /*Compute the chroma MVs for the 4MV mode.*/
+ (*set_chroma_mvs)(bmvs[1],bmvs[0]);
+ /*Do a MV search against the golden frame.*/
+ oc_mcenc_search_1mv(_enc->mcenc,mb-_enc->state.mbs,OC_FRAME_GOLD);
+ /*We are now ready to do mode decision for this macro block.
+ Mode decision is done by exhaustively examining all potential choices.
+ Since we use a minimum-quality encoding strategy, this amounts to
+ simply selecting the mode which uses the smallest number of bits,
+ since the minimum quality will be met in any mode.
+ Obviously, doing the motion compensation, fDCT, tokenization, and then
+ counting the bits each token uses is computationally expensive.
+ Theora's EOB runs can also split the cost of these tokens across
+ multiple fragments, and naturally we don't know what the optimal
+ choice of Huffman codes will be until we know all the tokens we're
+ going to encode in all the fragments.
+
+ So we use a simple approach to estimating the bit cost of each mode
+ based upon the SAD value of the residual.
+ The mathematics behind the technique are outlined by Kim \cite{Kim03},
+ but the process is very simple.
+ For each quality index and SAD value, we have a table containing the
+ average number of bits needed to code a fragment.
+ The SAD values are placed into a small number of bins (currently 16).
+ The bit counts are obtained by examining actual encoded frames, with
+ optimal Huffman codes selected and EOB bits appropriately divided
+ among all the blocks they involve.
+ A separate QIxSAD table is kept for each mode and color plane.
+ It may be possible to combine many of these, but only experimentation
+ will tell which ones truly represent the same distribution.
+
+ @ARTICLE{Kim03,
+ author="Hyun Mun Kim",
+ title="Adaptive Rate Control Using Nonlinear Regression",
+ journal="IEEE Transactions on Circuits and Systems for Video
+ Technology",
+ volume=13,
+ number=5,
+ pages="432--439",
+ month="May",
+ year=2003
+ }*/
+ memset(bits,0,sizeof(bits));
+ mbintrabits=0;
+ /*Find the SAD values for each coded fragment for each possible mode.*/
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ pli=mapi>>2;
+ bi=mapi&3;
+ fragi=mb->map[pli][bi];
+ frag=_enc->state.frags+fragi;
+ efrag=_enc->frinfo+fragi;
+ err[OC_MODE_INTRA][mapi]=0;
+ for(ci=1;ci<64;ci++){
+ err[OC_MODE_INTRA][mapi]+=abs(efrag->dct_coeffs[ci]);
+ }
+ err[OC_MODE_INTER_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,0,0,pli,
+ OC_FRAME_PREV);
+ err[OC_MODE_INTER_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+ mbinfo->mvs[0][OC_FRAME_PREV][0],mbinfo->mvs[0][OC_FRAME_PREV][1],
+ pli,OC_FRAME_PREV);
+ err[OC_MODE_INTER_MV_LAST][mapi]=oc_enc_frag_sad(_enc,frag,
+ last_mv[0][0],last_mv[0][1],pli,OC_FRAME_PREV);
+ err[OC_MODE_INTER_MV_LAST2][mapi]=oc_enc_frag_sad(_enc,frag,
+ last_mv[1][0],last_mv[1][1],pli,OC_FRAME_PREV);
+ err[OC_MODE_INTER_MV_FOUR][mapi]=oc_enc_frag_sad(_enc,frag,
+ bmvs[!!pli][bi][0],bmvs[!!pli][bi][1],pli,OC_FRAME_PREV);
+ err[OC_MODE_GOLDEN_NOMV][mapi]=oc_enc_frag_sad(_enc,frag,
+ 0,0,pli,OC_FRAME_GOLD);
+ err[OC_MODE_GOLDEN_MV][mapi]=oc_enc_frag_sad(_enc,frag,
+ mbinfo->mvs[0][OC_FRAME_GOLD][0],mbinfo->mvs[0][OC_FRAME_GOLD][1],
+ pli,OC_FRAME_GOLD);
+ /*Using these distortion values, estimate the number of bits needed to
+ code this fragment in each mode.*/
+ qi=_enc->qis[OC_INTRA_FRAME][frag_qii[codedi][OC_INTRA_FRAME][0]];
+ mbintrabits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+ OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+ qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][0]];
+ bits[OC_MODE_INTRA]+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+ OC_MINI(err[OC_MODE_INTRA][mapi]>>8,15)];
+ qi=_enc->qis[OC_INTER_FRAME][frag_qii[codedi][OC_INTER_FRAME][1]];
+ for(modei=OC_MODE_INTRA+1;modei<OC_NMODES;modei++){
+ bits[modei]+=OC_RES_BITRATES[qi][pli][modei][
+ OC_MINI(err[modei][mapi]>>6,15)];
+ }
+ }
+ /*Bit costs are stored in the table with extra precision.
+ Round them down to whole bits here.*/
+ for(modei=0;modei<OC_NMODES;modei++){
+ bits[modei]=bits[modei]+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+ }
+ /*Estimate the cost of coding the label for each mode.
+ See comments at oc_mode_scheme_chooser_cost() for a description of the
+ method.*/
+ for(modei=0;modei<OC_NMODES;modei++){
+ bits[modei]+=oc_mode_scheme_chooser_cost(&_enc->mode_scheme_chooser,
+ modei);
+ }
+ /*Add the motion vector bits for each mode that requires them.*/
+ mbpmvbitsa=oc_mvbitsa(mbinfo->mvs[0][OC_FRAME_PREV][0],
+ mbinfo->mvs[0][OC_FRAME_PREV][1]);
+ mbgmvbitsa=oc_mvbitsa(mbinfo->mvs[1][OC_FRAME_GOLD][0],
+ mbinfo->mvs[0][OC_FRAME_GOLD][1]);
+ mb4mvbitsa=mb4mvbitsb=0;
+ for(codedi=0;codedi<ncoded_luma;codedi++){
+ mb4mvbitsa=oc_mvbitsa(bmvs[0][coded[codedi]][0],
+ bmvs[0][coded[codedi]][1]);
+ mb4mvbitsb+=12;
+ }
+ /*We use the same opportunity cost method of estimating the cost of
+ coding the motion vectors with the two different schemes as we do for
+ estimating the cost of the mode labels.
+ However, because there are only two schemes and they're both pretty
+ simple, this can just be done inline.*/
+ bits[OC_MODE_INTER_MV]+=OC_MINI(mvbitsa+mbpmvbitsa,mvbitsb+12)-
+ OC_MINI(mvbitsa,mvbitsb);
+ bits[OC_MODE_GOLDEN_MV]+=OC_MINI(mvbitsa+mbgmvbitsa,mvbitsb+12)-
+ OC_MINI(mvbitsa,mvbitsb);
+ bits[OC_MODE_INTER_MV_FOUR]+=OC_MINI(mvbitsa+mb4mvbitsa,
+ mvbitsb+mb4mvbitsb)-OC_MINI(mvbitsa,mvbitsb);
+ /*Finally, pick the mode with the cheapest estimated bit cost.*/
+ mb->mode=0;
+ for(modei=1;modei<OC_NMODES;modei++)if(bits[modei]<bits[mb->mode]){
+ /*Do not select 4MV mode when not all the luma blocks are coded when
+ we're in VP3 compatibility mode.*/
+ if(_enc->vp3_compatible&&modei==OC_MODE_INTER_MV_FOUR&&ncoded_luma<4){
+ continue;
+ }
+ mb->mode=modei;
+ }
+#if defined(OC_BITRATE_STATS)
+ /*Remember the error for the mode we selected in each fragment.*/
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ fragi=mb->map[mapi>>2][mapi&3];
+ efrag=_enc->frinfo+fragi;
+ efrag->eerror=err[mb->mode][mapi];
+ }
+#endif
+ /*Go back and store the selected qi index corresponding to the selected
+ mode in each fragment.*/
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ fragi=mb->map[mapi>>2][mapi&3];
+ frag=_enc->state.frags+fragi;
+ efrag=_enc->frinfo+fragi;
+ efrag->qii=(unsigned char)
+ frag_qii[codedi][OC_INTER_FRAME][mb->mode!=0];
+ frag->qi=_enc->qis[OC_INTER_FRAME][efrag->qii];
+ }
+ inter_bits+=bits[mb->mode];
+ intra_bits+=mbintrabits+(1<<OC_BIT_SCALE-1)>>OC_BIT_SCALE;
+ oc_mode_scheme_chooser_update(&_enc->mode_scheme_chooser,mb->mode);
+ switch(mb->mode){
+ case OC_MODE_INTER_MV:{
+ mvbitsa+=mbpmvbitsa;
+ mvbitsb+=12;
+ last_mv[1][0]=last_mv[0][0];
+ last_mv[1][1]=last_mv[0][1];
+ mbmv[0]=last_mv[0][0]=mbinfo->mvs[0][OC_FRAME_PREV][0];
+ mbmv[1]=last_mv[0][1]=mbinfo->mvs[0][OC_FRAME_PREV][1];
+ }break;
+ case OC_MODE_INTER_MV_LAST:{
+ mbmv[0]=last_mv[0][0];
+ mbmv[1]=last_mv[0][1];
+ }break;
+ case OC_MODE_INTER_MV_LAST2:{
+ mbmv[0]=last_mv[1][0];
+ mbmv[1]=last_mv[1][1];
+ last_mv[1][0]=last_mv[0][0];
+ last_mv[1][1]=last_mv[0][1];
+ last_mv[0][0]=mbmv[0];
+ last_mv[0][1]=mbmv[1];
+ }break;
+ case OC_MODE_INTER_MV_FOUR:{
+ mvbitsa+=mb4mvbitsa;
+ mvbitsb+=mb4mvbitsb;
+ if(ncoded_luma>0){
+ /*After 4MV mode, the last MV is the one from the last coded luma
+ block.*/
+ last_mv[1][0]=last_mv[0][0];
+ last_mv[1][1]=last_mv[0][1];
+ last_mv[0][0]=bmvs[0][coded[ncoded_luma-1]][0];
+ last_mv[0][1]=bmvs[0][coded[ncoded_luma-1]][1];
+ }
+ }break;
+ case OC_MODE_GOLDEN_MV:{
+ mvbitsa+=mbgmvbitsa;
+ mvbitsb+=12;
+ mbmv[0]=mbinfo->mvs[0][OC_FRAME_GOLD][0];
+ mbmv[1]=mbinfo->mvs[0][OC_FRAME_GOLD][1];
+ }break;
+ }
+ if(OC_MODE_HAS_MV[mb->mode]){
+ /*Special case 4MV mode.
+ MVs are stored in bmvs.*/
+ if(mb->mode==OC_MODE_INTER_MV_FOUR){
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ pli=mapi>>2;
+ bi=mapi&3;
+ fragi=mb->map[pli][bi];
+ frag=_enc->state.frags+fragi;
+ frag->mbmode=mb->mode;
+ frag->mv[0]=bmvs[!!pli][bi][0];
+ frag->mv[1]=bmvs[!!pli][bi][1];
+ }
+ }
+ /*For every other mode with a MV, it is stored in mbmv.*/
+ else{
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ fragi=mb->map[mapi>>2][mapi&3];
+ frag=_enc->state.frags+fragi;
+ frag->mbmode=mb->mode;
+ frag->mv[0]=mbmv[0];
+ frag->mv[1]=mbmv[1];
+ }
+ }
+ }
+ /*For modes with no MV, ensure 0,0 is stored in each fragment.*/
+ else{
+ for(codedi=0;codedi<ncoded;codedi++){
+ mapi=coded[codedi];
+ fragi=mb->map[mapi>>2][mapi&3];
+ frag=_enc->state.frags+fragi;
+ frag->mbmode=mb->mode;
+ frag->mv[0]=frag->mv[1]=0;
+ }
+ }
+ }
+ }
+ /*Finally, compare the cost of an INTER frame and an INTRA frame.*/
+ if(mvbitsb<mvbitsa){
+ _enc->mv_scheme=1;
+ inter_bits+=mvbitsb;
+ }
+ else{
+ _enc->mv_scheme=0;
+ inter_bits+=mvbitsa;
+ }
+ inter_bits+=_enc->mode_scheme_chooser.scheme_bits[
+ _enc->mode_scheme_chooser.scheme_list[0]];
+ /*The easiest way to count the bits needed for coded/not coded fragments is
+ to code them.
+ We need to do this anyway, might as well do it now.*/
+ oggpackB_reset(&_enc->opb_coded_flags);
+ inter_bits+=oc_enc_partial_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+ inter_bits+=oc_enc_coded_sb_flags_pack(_enc,&_enc->opb_coded_flags);
+ inter_bits+=oc_enc_coded_block_flags_pack(_enc,&_enc->opb_coded_flags);
+ /*Select the quantizer list for INTER frames.*/
+ _enc->state.nqis=_enc->nqis[OC_INTER_FRAME];
+ for(qii=0;qii<_enc->state.nqis;qii++){
+ _enc->state.qis[qii]=_enc->qis[OC_INTER_FRAME][qii];
+ }
+ if(intra_bits>inter_bits){
+ _enc->vbr->est_bits=inter_bits;
+ return OC_INTER_FRAME;
+ }
+ /*All INTRA mode is smaller, but we haven't counted up the cost of all the
+ not coded fragments we will now have to code.*/
+ uncoded_fragi_end=uncoded_fragi=_enc->state.uncoded_fragis;
+ for(pli=0;pli<3;pli++){
+ uncoded_fragi_end-=_enc->state.nuncoded_fragis[pli];
+ while(uncoded_fragi-->uncoded_fragi_end){
+ fragi=*uncoded_fragi;
+ frag=_enc->state.frags+fragi;
+ /*Assume a very small bit cost for invalid fragments.*/
+ if(frag->invalid)intra_bits+=OC_RES_BITRATES[0][pli][OC_MODE_INTRA][0];
+ else{
+ int eerror;
+ eerror=0;
+ efrag=_enc->frinfo+fragi;
+ for(ci=1;ci<64;ci++)eerror+=abs(efrag->dct_coeffs[ci]);
+#if defined(OC_BITRATE_STATS)
+ efrag->eerror=eerror;
+#endif
+ qi=_enc->qis[OC_INTRA_FRAME][0];
+ for(qii=1;qii<_enc->nqis[OC_INTRA_FRAME];qii++){
+ if(_enc->qis[OC_INTRA_FRAME][qii]<qi&&
+ efrag->qi_min[0]<=_enc->qis[OC_INTRA_FRAME][qii]){
+ qi=_enc->qis[OC_INTRA_FRAME][qii];
+ }
+ }
+ intra_bits+=OC_RES_BITRATES[qi][pli][OC_MODE_INTRA][
+ OC_MINI(eerror>>8,15)];
+ /*If it turns out INTRA mode was more expensive, we're done.*/
+ if(intra_bits>inter_bits){
+ _enc->vbr->est_bits=inter_bits;
+ return OC_INTER_FRAME;
+ }
+ }
+ }
+ }
+ /*So, we've compared the full cost estimates, and INTRA is still better.
+ Code an INTRA frame instead.*/
+ oc_enc_vbr_mark_all_intra(_enc);
+ _enc->vbr->est_bits=intra_bits;
+ return OC_INTRA_FRAME;
+}
+
+/*A pipeline stage for transforming, quantizing, and tokenizing the frame.*/
+
+static int oc_vbr_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ return 0;
+}
+
+static int oc_vbr_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ return 0;
+}
+
+static int oc_vbr_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_enc_ctx *enc;
+ int ret;
+ enc=_stage->enc;
+ if(enc->state.curframe_num==0||
+ enc->state.curframe_num-enc->state.keyframe_num>=
+ enc->keyframe_frequency_force){
+ enc->state.frame_type=OC_INTRA_FRAME;
+ oc_enc_vbr_quant_sel_quality(enc,1);
+ oc_enc_vbr_mark_all_intra(enc);
+ }
+ else{
+ oc_enc_mark_coded(enc);
+ /*Only proceed if we have some coded blocks.
+ No coded blocks -> dropped frame -> 0 byte packet.*/
+ if(enc->ncoded_frags>0){
+ oc_enc_vbr_quant_sel_quality(enc,0);
+ enc->state.frame_type=oc_enc_choose_mbmodes(enc);
+ if(enc->state.frame_type==OC_INTER_FRAME)oc_enc_do_inter_dcts(enc);
+ }
+ }
+ /*Only initialize subsequent stages after we know how many fragments will be
+ encoded, and at what quality (so the loop filter can be set up
+ properly).*/
+ if(_stage->next!=NULL){
+ ret=(*_stage->next->pipe_start)(_stage->next);
+ if(ret<0)return ret;
+ }
+ if(enc->ncoded_frags>0){
+ /*TODO: These stages could be pipelined with reconstruction.*/
+ oc_enc_vbr_quant_dc(enc);
+ oc_enc_vbr_residual_tokenize(enc);
+ }
+ if(_stage->next!=NULL){
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ return (*_stage->next->pipe_end)(_stage->next);
+ }
+ return 0;
+}
+
+/*Initialize the transform, quantization, and tokenization stage of the
+ pipeline.
+ _enc: The encoding context.*/
+static void oc_vbr_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_vbr_pipe_start;
+ _stage->pipe_proc=oc_vbr_pipe_process;
+ _stage->pipe_end=oc_vbr_pipe_end;
+}
+
+
+static int oc_enc_vbr_init(oc_enc_vbr_ctx *_vbr,oc_enc_ctx *_enc){
+ _vbr->cfg.qi=_enc->state.info.quality;
+ _vbr->cfg.kf_qi_min=_vbr->cfg.df_qi_min=0;
+ _vbr->cfg.kf_qi_max=_vbr->cfg.df_qi_max=63;
+ _vbr->enc=_enc;
+ _vbr->impmap=oc_impmap_alloc(_enc);
+ _vbr->psych=oc_psych_alloc(_enc);
+ oc_vbr_pipe_init(&_vbr->pipe,_enc);
+ return 0;
+}
+
+static void oc_enc_vbr_clear(oc_enc_vbr_ctx *_vbr){
+ oc_psych_free(_vbr->psych);
+ oc_impmap_free(_vbr->impmap);
+}
+
+static int oc_enc_vbr_cfg(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+ if(_cfg->qi<0||_cfg->qi>63||_cfg->kf_qi_min<0||_cfg->kf_qi_min>63||
+ _cfg->kf_qi_max<_cfg->kf_qi_min||_cfg->kf_qi_max>63||
+ _cfg->df_qi_min<0||_cfg->df_qi_min>63||
+ _cfg->df_qi_max<_cfg->df_qi_min||_cfg->df_qi_max>63){
+ return -OC_EINVAL;
+ }
+ memcpy(&_vbr->cfg,_cfg,sizeof(_vbr->cfg));
+ return 0;
+}
+
+static oc_enc_pipe_stage *oc_enc_vbr_create_pipe(oc_enc_vbr_ctx *_vbr){
+ oc_enc_pipe_stage *pipe;
+ _vbr->enc->fill_pipe.next=&_vbr->enc->pack_pipe;
+ _vbr->pipe.next=&_vbr->enc->copy_pipe;
+ /*TODO: Disable spatial masking and CSF filtering based on
+ application-specified speed level.*/
+ pipe=oc_psych_prepend_to_pipe(_vbr->psych,&_vbr->pipe);
+ _vbr->enc->fdct_pipe.next=pipe;
+ /*TODO: Disable impmap based on application-specified speed level.*/
+ pipe=oc_impmap_prepend_to_pipe(_vbr->impmap,&_vbr->enc->fdct_pipe);
+ pipe=oc_mcenc_prepend_to_pipe(_vbr->enc->mcenc,pipe);
+ return pipe;
+}
+
+
+oc_enc_vbr_ctx *oc_enc_vbr_alloc(oc_enc_ctx *_enc){
+ oc_enc_vbr_ctx *vbr;
+ vbr=(oc_enc_vbr_ctx *)_ogg_malloc(sizeof(*vbr));
+ oc_enc_vbr_init(vbr,_enc);
+ return vbr;
+}
+
+void oc_enc_vbr_free(oc_enc_vbr_ctx *_vbr){
+ if(_vbr!=NULL){
+ oc_enc_vbr_clear(_vbr);
+ _ogg_free(_vbr);
+ }
+}
+
+int oc_enc_vbr_enable(oc_enc_vbr_ctx *_vbr,theora_vbr_cfg *_cfg){
+ if(_cfg!=NULL){
+ int ret;
+ ret=oc_enc_vbr_cfg(_vbr,_cfg);
+ if(ret<0)return ret;
+ }
+ /*Map the qi to a multiple of JND values.*/
+ _vbr->qscale=_vbr->cfg.qi>=63?0.5F:1.5F*OC_POWF(2,0.0625F*(64-_vbr->cfg.qi));
+ _vbr->enc->pipe=oc_enc_vbr_create_pipe(_vbr);
+ /*TODO: Implement a real speed level.*/
+ _vbr->enc->speed_max=0;
+ _vbr->enc->set_speed=oc_enc_set_speed_null;
+ return 0;
+}
Added: experimental/derf/theora-exp/lib/encvbr.h
===================================================================
--- experimental/derf/theora-exp/lib/encvbr.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/encvbr.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -0,0 +1,43 @@
+#if !defined(_encvbr_H)
+# define _encvbr_H (1)
+# include "encint.h"
+
+
+
+typedef struct oc_impmap_ctx oc_impmap_ctx;
+typedef struct oc_psych_ctx oc_psych_ctx;
+
+
+
+/*Context information for the VBR encoder.*/
+struct oc_enc_vbr_ctx{
+ /*Configuration information.*/
+ theora_vbr_cfg cfg;
+ /*The main VBR encoder's pipe stage.*/
+ oc_enc_pipe_stage pipe;
+ /*The scale factor for the current quality setting.*/
+ float qscale;
+ /*Minimum psychovisual tolerance for the DC coefficients in each plane.*/
+ unsigned dc_tol_mins[3];
+ /*The estimated bit cost of the current frame.*/
+ int est_bits;
+ /*The encode context.*/
+ oc_enc_ctx *enc;
+ /*Context information used to generate the importance map.*/
+ oc_impmap_ctx *impmap;
+ /*Context information used to generate low-level perceptual weightings.*/
+ oc_psych_ctx *psych;
+};
+
+
+oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc);
+void oc_impmap_free(oc_impmap_ctx *_impmap);
+oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
+ oc_enc_pipe_stage *_next);
+
+oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc);
+void oc_psych_free(oc_psych_ctx *_psych);
+oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
+ oc_enc_pipe_stage *_next);
+
+#endif
Modified: experimental/derf/theora-exp/lib/fdct.c
===================================================================
--- experimental/derf/theora-exp/lib/fdct.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/fdct.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -316,3 +316,131 @@
if(cext==NULL)for(in=w,out=_y,end=out+8;out<end;in+=8,out++)fdct8(out,in);
else for(in=w,out=_y,end=out+8;out<end;in+=8,out++)fdct8_ext(out,in,cext);
}
+
+/*Performs an fDCT on a given fragment.
+ _frag: The fragment to perform the 2D DCT on.
+ _dct_vals: The output buffer for the DCT coefficients.
+ _ystride: The Y stride of the plane the fragment belongs to.
+ _framei: The picture buffer index to perform the DCT on.
+ Use OC_FRAME_IO for the current input frame.*/
+void oc_frag_intra_fdct(const oc_fragment *_frag,ogg_int16_t _dct_vals[64],
+ int _ystride,int _framei){
+ ogg_int16_t pix_buf[64];
+ unsigned char *pixels;
+ int pixi;
+ int y;
+ int x;
+ /*NOTE: 128 is subtracted from each pixel value to make it signed.
+ The original VP3 source claimed that, "this reduces the internal precision
+ requirments [sic] in the DCT transform."
+ This is of course not actually true.
+ The transform must still support input in the range [-255,255] to code
+ predicted fragments, since the same transform is used for both.
+ This actually _reduces_ the precision of the results, because larger
+ (absolute) values would have fewer significant bits chopped off when
+ rounding.
+ We're stuck with it, however.
+ At least it might reduce bias towards 0 when coding unpredicted DC
+ coefficients, but that's not what VP3 justified it with.*/
+ pixels=_frag->buffer[_framei];
+ /*For border fragments, only copy pixels that are in the displayable
+ region of the image.
+ The DCT function will compute optimal padding values for the other
+ pixels.*/
+ if(_frag->border!=NULL){
+ ogg_int64_t mask;
+ mask=_frag->border->mask;
+ for(pixi=y=0;y<8;y++){
+ for(x=0;x<8;x++,pixi++){
+ pix_buf[pixi]=(ogg_int16_t)(((int)mask&1)?pixels[x]-128:0);
+ /*This branchless code is (almost) equivalent to the previous line:
+ int pmask;
+ pmask=-(int)mask&1;
+ pix_buf[pixi]=(ogg_int16_t)(pmask&pixels[x]);
+ We don't use this code to allow the user to pass in a buffer that is
+ the exact size of the displayed image, not the size padded to a
+ multiple of 16.
+ In the latter case, we might segfault on pixels[x] if it is not
+ mapped to a valid page, even though we would discard the value
+ we were attempting to read.*/
+ mask>>=1;
+ }
+ pixels+=_ystride;
+ }
+ oc_fdct8x8_border(_frag->border,_dct_vals,pix_buf);
+ }
+ /*Otherwise, copy all the pixels in the fragment and do a normal DCT.*/
+ else{
+ for(pixi=y=0;y<8;y++){
+ for(x=0;x<8;x++,pixi++)pix_buf[pixi]=(ogg_int16_t)(pixels[x]-128);
+ pixels+=_ystride;
+ }
+ oc_fdct8x8(_dct_vals,pix_buf);
+ }
+}
+
+/*A pipline stage for applying an fDCT to each (non-motion compensated) block
+ in a frame.*/
+
+static int oc_fdct_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+/*Does the fDCTs.
+ This pipeline stage proceeds in a planar fashion.*/
+static int oc_fdct_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int pli;
+ for(pli=0;pli<3;pli++){
+ int y_procd;
+ int y_avail;
+ /*Compute how far we can get in complete fragment rows.*/
+ y_procd=_stage->y_procd[pli];
+ y_avail=_y_avail[pli]&~7;
+ /*If that's farther than we've already gotten, do some fDCTs.*/
+ if(y_avail>y_procd){
+ oc_fragment_plane *fplane;
+ oc_fragment *frags;
+ oc_fragment *frag_end;
+ oc_fragment_enc_info *efrags;
+ int ystride;
+ int yfrag0;
+ int yrows;
+ fplane=_stage->enc->state.fplanes+pli;
+ ystride=_stage->enc->state.input[pli].ystride;
+ yfrag0=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+ yrows=y_avail-y_procd>>3;
+ frags=_stage->enc->state.frags+yfrag0;
+ efrags=_stage->enc->frinfo+yfrag0;
+ do{
+ for(frag_end=frags+fplane->nhfrags;frags<frag_end;frags++,efrags++){
+ oc_frag_intra_fdct(frags,efrags->dct_coeffs,ystride,OC_FRAME_IO);
+ }
+ _stage->y_procd[pli]+=8;
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ while(--yrows);
+ }
+ }
+ return 0;
+}
+
+static int oc_fdct_pipe_end(oc_enc_pipe_stage *_stage){
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+
+/*Initialize the fDCT stage of the pipeline.
+ _enc: The encoding context.*/
+void oc_fdct_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_fdct_pipe_start;
+ _stage->pipe_proc=oc_fdct_pipe_process;
+ _stage->pipe_end=oc_fdct_pipe_end;
+}
Modified: experimental/derf/theora-exp/lib/fdct.h
===================================================================
--- experimental/derf/theora-exp/lib/fdct.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/fdct.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -6,5 +6,8 @@
void oc_fdct8x8(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
void oc_fdct8x8_border(const oc_border_info *_border,ogg_int16_t _y[64],
ogg_int16_t _x[64]);
+void oc_frag_intra_fdct(const oc_fragment *_frag,ogg_int16_t _dct_vals[64],
+ int _ystride,int _framei);
+void oc_fdct_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc);
#endif
Modified: experimental/derf/theora-exp/lib/huffdec.c
===================================================================
--- experimental/derf/theora-exp/lib/huffdec.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/huffdec.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -84,7 +84,7 @@
if(!bits){
int ret;
binode=oc_huff_node_alloc(1);
- binode->depth=_depth>1;
+ binode->depth=(unsigned char)(_depth>1);
ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
if(ret<0){
@@ -96,7 +96,7 @@
else{
if(theora_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return OC_BADHEADER;
binode=oc_huff_node_alloc(0);
- binode->depth=_depth>1;
+ binode->depth=(unsigned char)(_depth>1);
binode->token=(unsigned char)bits;
}
*_binode=binode;
@@ -112,7 +112,6 @@
static int oc_huff_tree_mindepth(oc_huff_node *_binode){
int depth0;
int depth1;
- int cdepth;
if(_binode->nbits==0)return 0;
depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
@@ -127,9 +126,6 @@
Return: The number of entries that would be contained in a jump table of the
given depth.*/
static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
- int depth0;
- int depth1;
- int cdepth;
if(_binode->nbits==0||_depth<=0)return 1;
else{
return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
@@ -156,7 +152,7 @@
oc_huff_node *_binode,int _level,int _depth){
if(_level<=0||_binode->nbits==0){
int i;
- _binode->depth=_depth-_level;
+ _binode->depth=(unsigned char)(_depth-_level);
_nodes[0]=oc_huff_tree_collapse(_binode);
for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
}
@@ -176,13 +172,10 @@
Return: The new root of the collapsed sub-tree.*/
static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){
oc_huff_node *root;
- int nchildren;
int mindepth;
int depth;
int loccupancy;
int occupancy;
- int i;
- int inext;
depth=mindepth=oc_huff_tree_mindepth(_binode);
occupancy=1<<mindepth;
do{
Modified: experimental/derf/theora-exp/lib/impmap.c
===================================================================
--- experimental/derf/theora-exp/lib/impmap.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/impmap.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -343,6 +343,8 @@
/*Importance map context information.*/
struct oc_impmap_ctx{
+ /*The pipeline stage.*/
+ oc_enc_pipe_stage pipe;
/*Segmentation information for the Y plane.
Some day we may also wish to segment the chroma planes, or derive a
segmentation using all 3 planes at once.*/
@@ -2106,42 +2108,7 @@
}
return ret;
}
-
-oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc){
- theora_info *info;
- oc_impmap_ctx *impmap;
- int edge_sz;
- int width;
- int height;
- info=&_enc->state.info;
- width=info->pic_width;
- height=info->pic_height;
- impmap=(oc_impmap_ctx *)_ogg_malloc(sizeof(*impmap));
- oc_seg_init(&impmap->seg,width,height);
- impmap->inv_region_sz_max=100.0F/(width*height);
- edge_sz=width>1?height>1?(width-2<<1)+(height<<1):width:height;
- impmap->inv_edge_sz_max=2.0F/edge_sz;
- impmap->pic_x=info->pic_x;
- impmap->pic_y=info->pic_y;
- impmap->imp_avg=0.5F;
- /*Allocate space for the region stats and neighbor links.*/
- impmap->regions=(oc_impmap_region *)_ogg_malloc(
- impmap->seg.cregions*sizeof(impmap->regions[0]));
- impmap->enc=_enc;
- impmap->chroma_frag_weight=
- OC_IMPMAP_CHROMA_FRAG_WEIGHT_TABLE[_enc->state.info.pixel_fmt];
- return impmap;
-}
-
-void oc_impmap_free(oc_impmap_ctx *_impmap){
- if(_impmap!=NULL){
- oc_seg_clear(&_impmap->seg);
- _ogg_free(_impmap->regions);
- _ogg_free(_impmap);
- }
-}
-
-void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration){
+static void oc_impmap_fill(oc_impmap_ctx *_impmap,float _duration){
theora_img_plane yplane;
float imp_sum;
int img_offset;
@@ -2229,3 +2196,92 @@
}
#endif
}
+
+
+/*The importance map pipeline stage.
+ For now, for simplicity, this is not actually pipelined.
+ The quadtree segmentation algorithm does not really lend itself to it, and
+ even if an online segmentation algorithm were used, a full stall would be
+ created by the need to gather statistics over all the regions to assign
+ weights to any of them.*/
+
+static int oc_impmap_pipe_start(oc_enc_pipe_stage *_stage){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ return 0;
+}
+
+static int oc_impmap_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ int pli;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ return 0;
+}
+
+static int oc_impmap_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_enc_ctx *enc;
+ enc=_stage->enc;
+ oc_impmap_fill(enc->vbr->impmap,
+ enc->state.info.fps_denominator/(float)enc->state.info.fps_numerator);
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_start)(_stage->next);
+ if(ret<0)return ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ return (*_stage->next->pipe_end)(_stage->next);
+ }
+ return 0;
+}
+
+/*Initialize the importance map stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_impmap_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_impmap_pipe_start;
+ _stage->pipe_proc=oc_impmap_pipe_process;
+ _stage->pipe_end=oc_impmap_pipe_end;
+}
+
+
+oc_impmap_ctx *oc_impmap_alloc(oc_enc_ctx *_enc){
+ theora_info *info;
+ oc_impmap_ctx *impmap;
+ int edge_sz;
+ int width;
+ int height;
+ info=&_enc->state.info;
+ width=info->pic_width;
+ height=info->pic_height;
+ impmap=(oc_impmap_ctx *)_ogg_malloc(sizeof(*impmap));
+ oc_seg_init(&impmap->seg,width,height);
+ impmap->inv_region_sz_max=100.0F/(width*height);
+ edge_sz=width>1?height>1?(width-2<<1)+(height<<1):width:height;
+ impmap->inv_edge_sz_max=2.0F/edge_sz;
+ impmap->pic_x=info->pic_x;
+ impmap->pic_y=info->pic_y;
+ impmap->imp_avg=0.5F;
+ /*Allocate space for the region stats and neighbor links.*/
+ impmap->regions=(oc_impmap_region *)_ogg_malloc(
+ impmap->seg.cregions*sizeof(impmap->regions[0]));
+ impmap->enc=_enc;
+ impmap->chroma_frag_weight=
+ OC_IMPMAP_CHROMA_FRAG_WEIGHT_TABLE[_enc->state.info.pixel_fmt];
+ oc_impmap_pipe_init(&impmap->pipe,_enc);
+ return impmap;
+}
+
+void oc_impmap_free(oc_impmap_ctx *_impmap){
+ if(_impmap!=NULL){
+ oc_seg_clear(&_impmap->seg);
+ _ogg_free(_impmap->regions);
+ _ogg_free(_impmap);
+ }
+}
+
+
+oc_enc_pipe_stage *oc_impmap_prepend_to_pipe(oc_impmap_ctx *_impmap,
+ oc_enc_pipe_stage *_next){
+ _impmap->pipe.next=_next;
+ return &_impmap->pipe;
+}
Modified: experimental/derf/theora-exp/lib/mcenc.c
===================================================================
--- experimental/derf/theora-exp/lib/mcenc.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/mcenc.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -4,12 +4,13 @@
#include "encint.h"
struct oc_mcenc_ctx{
- oc_enc_ctx *enc;
- int candidates[12][2];
- int setb0;
- int ncandidates;
- ogg_int32_t mvapw1;
- ogg_int32_t mvapw2;
+ oc_enc_ctx *enc;
+ oc_enc_pipe_stage pipe;
+ int candidates[12][2];
+ int setb0;
+ int ncandidates;
+ ogg_int32_t mvapw1[2];
+ ogg_int32_t mvapw2[2];
};
/*The maximum Y plane SAD value for accepting the median predictor.*/
@@ -68,6 +69,8 @@
int _which_frame){
oc_mb_enc_info *nemb;
oc_mb_enc_info *emb;
+ ogg_int32_t mvapw1;
+ ogg_int32_t mvapw2;
int a[3][2];
int ncandidates;
int i;
@@ -116,15 +119,17 @@
/*Fill in set B: accelerated predictors for this and adjacent macro
blocks.*/
_mcenc->setb0=ncandidates;
+ mvapw1=_mcenc->mvapw1[_which_frame];
+ mvapw2=_mcenc->mvapw2[_which_frame];
/*The first time through the loop use the current macro block.*/
nemb=emb;
for(i=0;;i++){
_mcenc->candidates[ncandidates][0]=
- OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][0]*_mcenc->mvapw1-
- nemb->mvs[2][_which_frame][0]*_mcenc->mvapw2,16,0x8000);
+ OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][0]*mvapw1-
+ nemb->mvs[2][_which_frame][0]*mvapw2,16,0x8000);
_mcenc->candidates[ncandidates][1]=
- OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][1]*_mcenc->mvapw1-
- nemb->mvs[2][_which_frame][1]*_mcenc->mvapw2,16,0x8000);
+ OC_DIV_ROUND_POW2(nemb->mvs[1][_which_frame][1]*mvapw1-
+ nemb->mvs[2][_which_frame][1]*mvapw2,16,0x8000);
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
_mcenc->candidates[ncandidates][0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
@@ -327,17 +332,6 @@
return _best_err;
}
-oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc){
- oc_mcenc_ctx *mcenc;
- mcenc=_ogg_calloc(1,sizeof(*mcenc));
- mcenc->enc=_enc;
- return mcenc;
-}
-
-void oc_mcenc_free(oc_mcenc_ctx *_mcenc){
- _ogg_free(_mcenc);
-}
-
/*Perform a motion vector search for this macro block against a single
reference frame.
As a bonus, individual block motion vectors are computed as well, as much of
@@ -351,7 +345,7 @@
_error: Returns the prediction error for the macro block motion vector.
_error4mv: Returns sum of the prediction error for the individual block
motion vectors.*/
-void oc_mcenc_search(oc_mcenc_ctx *_mcenc,int _mbi,int _frame,
+static void oc_mcenc_search(oc_mcenc_ctx *_mcenc,int _mbi,int _frame,
char _bmvs[4][2],int *_error,int *_error4mv){
oc_mb_enc_info *embs;
oc_mb_enc_info *emb;
@@ -562,7 +556,8 @@
reference frame.
The actual motion vector is stored in the appropriate place in the
oc_mb_enc_info structure.
- Block-level motion vectors are not computed.
+ This is like the above oc_mcenc_search() routine, except that block-level
+ motion vectors are not computed.
_mcenc: The motion compensation context.
_mbi: The macro block index.
_frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.
@@ -682,29 +677,109 @@
return best_err;
}
-void oc_mcenc_analyze(oc_mcenc_ctx *_mcenc){
- oc_mb_enc_info *embs;
- oc_mb *mbs;
- ogg_int64_t nframes;
- int nmbs;
- int mbi;
- /*If there is no previous frame, then skip motion analysis: Every vector has
- been initialized to (0,0).*/
- if(_mcenc->enc->state.ref_frame_idx[OC_FRAME_PREV]<0)return;
- /*Set up the accelerated MV weights for previous frame prediction.*/
- _mcenc->mvapw1=(ogg_int32_t)1<<17;
- _mcenc->mvapw2=(ogg_int32_t)1<<16;
- mbs=_mcenc->enc->state.mbs;
- embs=_mcenc->enc->mbinfo;
- nmbs=_mcenc->enc->state.fplanes[0].nsbs<<2;
- for(mbi=0;mbi<nmbs;mbi++)if(mbs[mbi].mode!=OC_MODE_INVALID){
+/*A pipe to perform a motion vector search for each macro block.*/
+
+static int oc_mcenc_pipe_start(oc_enc_pipe_stage *_stage){
+ oc_enc_ctx *enc;
+ ogg_int64_t nframes;
+ int pli;
+ int mbi;
+ for(pli=0;pli<3;pli++)_stage->y_procd[pli]=0;
+ /*Move the motion vector predictors back a frame.
+ We could pipeline this, too, but it's probably not worth it.*/
+ enc=_stage->enc;
+ for(mbi=enc->state.fplanes[0].nsbs<<2;mbi-->0;){
oc_mb_enc_info *emb;
- emb=embs+mbi;
- oc_mcenc_search(_mcenc,mbi,OC_FRAME_PREV,emb->bmvs,&emb->aerror,
- &emb->aerror4mv);
+ emb=enc->mbinfo+mbi;
+ memmove(emb->mvs+1,emb->mvs,2*sizeof(emb->mvs[0]));
}
+ /*Set up the accelerated MV weights for previous frame prediction.*/
+ enc->mcenc->mvapw1[OC_FRAME_PREV]=(ogg_int32_t)1<<17;
+ enc->mcenc->mvapw2[OC_FRAME_PREV]=(ogg_int32_t)1<<16;
/*Set up the accelerated MV weights for golden frame prediction.*/
- nframes=_mcenc->enc->state.curframe_num-_mcenc->enc->state.keyframe_num;
- _mcenc->mvapw1=(ogg_int32_t)(nframes!=1?(nframes<<17)/(nframes-1):0);
- _mcenc->mvapw2=(ogg_int32_t)(nframes!=2?(nframes<<16)/(nframes-2):0);
+ nframes=enc->state.curframe_num-enc->state.keyframe_num;
+ enc->mcenc->mvapw1[OC_FRAME_GOLD]=(ogg_int32_t)(
+ nframes!=1?(nframes<<17)/(nframes-1):0);
+ enc->mcenc->mvapw2[OC_FRAME_GOLD]=(ogg_int32_t)(
+ nframes!=2?(nframes<<16)/(nframes-2):0);
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
}
+
+static int oc_mcenc_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ oc_mcenc_ctx *mcenc;
+ int pli;
+ mcenc=_stage->enc->mcenc;
+ /*For now we ignore the chroma planes.*/
+ for(pli=1;pli<3;pli++)_stage->y_procd[pli]=_y_avail[pli];
+ /*Only do motion analysis if there is a previous frame; otherwise every
+ vector has already been initialized to (0,0).*/
+ if(mcenc->enc->state.ref_frame_idx[OC_FRAME_PREV]>=0){
+ int y_avail;
+ y_avail=_y_avail[0];
+ /*Round to a super-block row, except for the last one, which may be
+ incomplete.*/
+ if(y_avail<(int)mcenc->enc->state.info.frame_height)y_avail&=~31;
+ while(_stage->y_procd[0]<y_avail){
+ oc_mb_enc_info *embs;
+ oc_mb *mbs;
+ int mbi;
+ int mbi_end;
+ mbi=(_stage->y_procd[0]>>4)*mcenc->enc->state.fplanes[0].nhsbs;
+ mbi_end=mbi+mcenc->enc->state.fplanes[0].nhsbs<<1;
+ mbs=mcenc->enc->state.mbs;
+ embs=mcenc->enc->mbinfo;
+ for(;mbi<mbi_end;mbi++)if(mbs[mbi].mode!=OC_MODE_INVALID){
+ oc_mb_enc_info *emb;
+ emb=embs+mbi;
+ oc_mcenc_search(mcenc,mbi,OC_FRAME_PREV,emb->bmvs,&emb->aerror,
+ &emb->aerror4mv);
+ }
+ /*Chain to the next stage.*/
+ _stage->y_procd[0]=OC_MINI(_stage->y_procd[0]+32,y_avail);
+ if(_stage->next!=NULL){
+ int ret;
+ ret=_stage->next->pipe_proc(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ }
+ else{
+ _stage->y_procd[0]=_y_avail[0];
+ if(_stage->next!=NULL){
+ return _stage->next->pipe_proc(_stage->next,_stage->y_procd);
+ }
+ }
+ return 0;
+}
+
+static int oc_mcenc_pipe_end(oc_enc_pipe_stage *_stage){
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the motion vector search stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_mcenc_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_mcenc_pipe_start;
+ _stage->pipe_proc=oc_mcenc_pipe_process;
+ _stage->pipe_end=oc_mcenc_pipe_end;
+}
+
+oc_mcenc_ctx *oc_mcenc_alloc(oc_enc_ctx *_enc){
+ oc_mcenc_ctx *mcenc;
+ mcenc=_ogg_calloc(1,sizeof(*mcenc));
+ mcenc->enc=_enc;
+ oc_mcenc_pipe_init(&mcenc->pipe,_enc);
+ return mcenc;
+}
+
+void oc_mcenc_free(oc_mcenc_ctx *_mcenc){
+ _ogg_free(_mcenc);
+}
+
+oc_enc_pipe_stage *oc_mcenc_prepend_to_pipe(oc_mcenc_ctx *_mcenc,
+ oc_enc_pipe_stage *_next){
+ _mcenc->pipe.next=_next;
+ return &_mcenc->pipe;
+}
Modified: experimental/derf/theora-exp/lib/ocintrin.h
===================================================================
--- experimental/derf/theora-exp/lib/ocintrin.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/ocintrin.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -21,6 +21,12 @@
#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
+/*Clamps an integer into the given range.
+ If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+ behavior is required to meet our documented API behavior).
+ _a: The lower bound.
+ _b: The value to clamp.
+ _c: The upper boud.*/
#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
#define OC_CLAMP255(_x) (oc_clamp255(_x))
/*Divides an integer by a power of two, truncating towards 0.
Modified: experimental/derf/theora-exp/lib/psych.c
===================================================================
--- experimental/derf/theora-exp/lib/psych.c 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/psych.c 2005-09-18 00:58:06 UTC (rev 10030)
@@ -1,7 +1,7 @@
#include <float.h>
#include <math.h>
#include <string.h>
-#include "encint.h"
+#include "psych.h"
/*This is where we attempt to model low-level vision processes, such as
sensitivities due to the way human eyes are constructed and how data flows
@@ -1699,7 +1699,7 @@
value might be.
In Part II, Annex E, Section 2, it discusses the "point-wise extended
non-linearity" on which Nadenau's IaCLA-2 method is based.
- There it suggests using a parameter a which would case k_L to have an
+ There it suggests using a parameter a which would cause k_L to have an
equivalent value of 2^{component_bit_depth-1}*1E-4.
Nadenau reports, however, that the standard specifies a value of 1E-4
directly.
@@ -1737,8 +1737,8 @@
a wavelet decomposition on the diagonal bands, which can lead to excessive
horizontal and vertical masking around diagonal edges and decreased
diagonal masking.
- The DCT has smaller frequency bands, than wavelets, and so we can get a
- slight improvement by moving some coefficients around in the high frequency
+ The DCT has smaller frequency bands than wavelets, and so we can get a slight
+ improvement by moving some coefficients around in the high frequency
channels.
Each coefficient uses a combination of nearby coefficients in the same DCT
@@ -1841,6 +1841,7 @@
/*This parameter, r_{csf}^\nu, compensates for the dyanmic range of the DCT
coefficients.*/
#define OC_MASK_R_CSF_NU (8.8388347648318440550E-2F)
+/*#define OC_MASK_R_CSF_NU (0.37892914162759952059F)*/
/*This parameter, k_L^\nu, determines the dynamic range of the neighborhood
masking.*/
@@ -1907,13 +1908,17 @@
/*Scratch space used by the psychovisual model.*/
struct oc_psych_ctx{
+ /*The CSF filter pipeline stage.*/
+ oc_enc_pipe_stage csf_pipe;
+ /*The spatial masking pipeline stage.*/
+ oc_enc_pipe_stage mask_pipe;
/*A single row of CSF-filtered coefficients (after vertical filtering).
This has OC_CSF_FILTER_SZ_MAX-1 blocks of padding on each side.*/
oc_weight_block *csf_row;
/*OC_MASK_WINDOW_SZ_MAX rows of CSF-filtered coefficients (after both
- vertical and horizontal filtering).
+ vertical and horizontal filtering) for each plane.
This has no padding on either side.*/
- oc_weight_block **csf_weights;
+ oc_weight_block **csf_weights[3];
/*The half-width of the CSF filters in the current filter banks, rounded
down.*/
int csf_filter_sizes[5][8];
@@ -1930,11 +1935,13 @@
/*Sums of CSF-weighted masking values in each masking group over
OC_MASK_WINDOW_SZ_MAX*2-1 rows, with OC_MASK_WINDOW_SZ_MAX-1 blocks of
padding on either side.*/
- oc_mask_block **mask_groups;
+ oc_mask_block **mask_groups[3];
/*Individual CSF-weighted masking values over OC_MASK_WINDOW_SZ_MAX rows.*/
- oc_weight_block **mask_weights;
+ oc_weight_block **mask_weights[3];
/*The encoding context.*/
oc_enc_ctx *enc;
+ /*The vertical delay for CSF filtering.*/
+ int vsize_max[3];
};
@@ -2043,6 +2050,383 @@
}
}
+static int oc_csf_pipe_start(oc_enc_pipe_stage *_stage){
+ oc_psych_ctx *psych;
+ int pli;
+ psych=_stage->enc->vbr->psych;
+ oc_psych_csf_filters_interpolate(psych,_stage->enc->vbr->qscale);
+ for(pli=0;pli<3;pli++){
+ int *vsizes;
+ int vfilti;
+ int filti;
+ _stage->y_procd[pli]=0;
+ vfilti=pli<<1;
+ vsizes=psych->csf_filter_sizes[vfilti];
+ /*Find the number of rows we have to perform DCTs in advance.*/
+ psych->vsize_max[pli]=0;
+ for(filti=0;filti<8;filti++){
+ psych->vsize_max[pli]=OC_MAXI(psych->vsize_max[pli],vsizes[filti]);
+ }
+ }
+ return _stage->next!=NULL?(*_stage->next->pipe_start)(_stage->next):0;
+}
+
+static int oc_csf_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ oc_psych_ctx *psych;
+ int pli;
+ psych=_stage->enc->vbr->psych;
+ for(pli=0;pli<3;pli++){
+ int y_procd;
+ int y_avail;
+ /*Compute how far we can get in complete fragment rows.*/
+ y_procd=_stage->y_procd[pli];
+ /*Add a vsize_max[pli] delay.*/
+ y_avail=(_y_avail[pli]&~7)-(psych->vsize_max[pli]<<3);
+ /*Perform CSF filtering on any newly available rows.*/
+ while(y_avail>y_procd){
+ oc_fragment_plane *fplane;
+ oc_fragment *frags;
+ oc_fragment_enc_info *efrags;
+ oc_weight_block *maskw_row;
+ oc_weight_block *csfw_row;
+ oc_csf_filter *vfilters;
+ oc_csf_filter *hfilters;
+ oc_mask_block *maskg_row;
+ float *csfw;
+ float *maskw;
+ float *maskg;
+ int *vsizes;
+ int *hsizes;
+ int fragi_end;
+ int fragx;
+ int cfragi0;
+ int cfragi;
+ int cfragj;
+ int mfragj;
+ int vfilti;
+ int hfilti;
+ int filti;
+ int ci;
+ int wi;
+ vfilti=pli<<1;
+ vfilters=psych->csf_filters[vfilti];
+ vsizes=psych->csf_filter_sizes[vfilti];
+ hfilti=(pli<<1)-(pli>0);
+ hfilters=psych->csf_filters[hfilti];
+ hsizes=psych->csf_filter_sizes[hfilti];
+ frags=psych->enc->state.frags;
+ efrags=psych->enc->frinfo;
+ fplane=psych->enc->state.fplanes+pli;
+ fragi_end=fplane->froffset+fplane->nfrags;
+ cfragi0=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+ /*First, the vertical filter.*/
+ for(fragx=0,cfragi=cfragi0;fragx<fplane->nhfrags;fragx++,cfragi++){
+ int fragi_off;
+ csfw=psych->csf_row[fragx+OC_CSF_FILTER_SZ_MAX-1];
+ for(ci=0;ci<64;ci++){
+ filti=ci>>3;
+ csfw[ci]=vfilters[filti][0]*efrags[cfragi].dct_coeffs[ci];
+ fragi_off=fplane->nhfrags;
+ for(wi=vsizes[filti];wi>0;wi--){
+ int coeffs;
+ cfragj=cfragi-fragi_off;
+ coeffs=cfragj>=fplane->froffset?efrags[cfragj].dct_coeffs[ci]:0;
+ cfragj=cfragi+fragi_off;
+ if(cfragj<fragi_end)coeffs+=efrags[cfragj].dct_coeffs[ci];
+ csfw[ci]+=0.5F*vfilters[filti][wi]*coeffs;
+ fragi_off+=fplane->nhfrags;
+ }
+ }
+ }
+ /*Next, the horizontal filtering.*/
+ maskg_row=psych->mask_groups[pli][OC_MASK_WINDOW_SZ_MAX-1<<1];
+ maskw_row=psych->mask_weights[pli][OC_MASK_WINDOW_SZ_MAX-1];
+ csfw_row=psych->csf_weights[pli][OC_MASK_WINDOW_SZ_MAX-1];
+ memset(maskg_row[0],0,sizeof(maskg_row[0])*(
+ fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+ for(fragx=0,cfragi=cfragi0;fragx<fplane->nhfrags;fragx++,cfragi++){
+ csfw=csfw_row[fragx];
+ if(frags[cfragi].invalid)memset(csfw,0,sizeof(oc_weight_block));
+ else{
+ cfragj=fragx+OC_CSF_FILTER_SZ_MAX-1;
+ mfragj=fragx+OC_MASK_WINDOW_SZ_MAX-1;
+ for(ci=0;ci<64;ci++){
+ filti=ci&7;
+ csfw[ci]=hfilters[filti][0]*psych->csf_row[cfragj][ci];
+ for(wi=hsizes[filti];wi>0;wi--){
+ csfw[ci]+=0.5F*hfilters[filti][wi]*(
+ psych->csf_row[cfragj-wi][ci]+psych->csf_row[cfragj+wi][ci]);
+ }
+ csfw[ci]=OC_FABSF(csfw[ci]);
+ }
+ maskw=maskw_row[fragx];
+ maskg=maskg_row[mfragj];
+ for(ci=1;ci<64;ci++){
+ maskw[ci]=OC_POWF(csfw[ci],OC_MASK_NU);
+ maskg[OC_MASK_GROUP[ci]]+=maskw[ci];
+ }
+ }
+ }
+ y_procd+=8;
+ _stage->y_procd[pli]=y_procd;
+ if(_stage->next!=NULL){
+ int ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+static int oc_csf_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_psych_ctx *psych;
+ int pli;
+ int ret;
+ int y_avail[3];
+ psych=_stage->enc->vbr->psych;
+ for(pli=0;pli<3;pli++){
+ y_avail[pli]=_stage->enc->state.input[pli].height+
+ (psych->vsize_max[pli]<<3);
+ }
+ ret=oc_csf_pipe_process(_stage,y_avail);
+ if(ret<0)return ret;
+ return _stage->next!=NULL?(*_stage->next->pipe_end)(_stage->next):0;
+}
+
+/*Initialize the CSF filter stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_csf_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_csf_pipe_start;
+ _stage->pipe_proc=oc_csf_pipe_process;
+ _stage->pipe_end=oc_csf_pipe_end;
+}
+
+
+static int oc_mask_pipe_start(oc_enc_pipe_stage *_stage){
+ oc_psych_ctx *psych;
+ int pli;
+ psych=_stage->enc->vbr->psych;
+ for(pli=0;pli<3;pli++){
+ oc_fragment_plane *fplane;
+ int rowi;
+ _stage->y_procd[pli]=-(OC_MASK_WINDOW_SZ_MAX-1)<<3;
+ fplane=psych->enc->state.fplanes+pli;
+ /*Just clear out the mask group weights for the rows above the image.*/
+ for(rowi=OC_MASK_WINDOW_SZ_MAX-1;rowi<(OC_MASK_WINDOW_SZ_MAX-1<<1);rowi++){
+ memset(psych->mask_groups[pli][rowi][0],0,
+ sizeof(oc_mask_block)*(fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+ }
+ }
+ return 0;
+}
+
+static int oc_mask_pipe_process(oc_enc_pipe_stage *_stage,int _y_avail[3]){
+ static const int o=OC_MASK_WINDOW_SZ_MAX-1;
+ oc_psych_ctx *psych;
+ int pli;
+ psych=_stage->enc->vbr->psych;
+ for(pli=0;pli<3;pli++){
+ int y_procd;
+ int y_avail;
+ /*Compute how far we can get in complete fragment rows.*/
+ y_procd=_stage->y_procd[pli];
+ /*Add an (OC_MASK_WINDOW_SZ_MAX-1) delay.*/
+ y_avail=(_y_avail[pli]&~7)-(o<<3);
+ /*Perform CSF filtering on any newly available rows.*/
+ while(y_avail>y_procd){
+ oc_weight_block *csfw_row;
+ oc_weight_block *maskw_row;
+ oc_mask_block **mask_groups;
+ oc_mask_block *maskg_row;
+ int rowi;
+ mask_groups=psych->mask_groups[pli];
+ if(y_procd>0){
+ oc_fragment_plane *fplane;
+ oc_fragment_enc_info *efrag;
+ oc_mask_block group_sums;
+ float *csf_offset;
+ float *csfw;
+ float *maskw;
+ float fscale;
+ float pscale;
+ int fragx;
+ int mfragi;
+ int mfragj;
+ int qti;
+ int gi;
+ int wi;
+ int wj;
+ int ci;
+ pscale=OC_YCbCr_SCALE[psych->enc->state.info.colorspace][pli];
+ csf_offset=psych->csf_offsets[pli];
+ maskw_row=psych->mask_weights[pli][0];
+ csfw_row=psych->csf_weights[pli][0];
+ for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+ group_sums[gi]=0;
+ for(wi=o-OC_MASK_WINDOW_SIZES[gi];wi<=o+OC_MASK_WINDOW_SIZES[gi];wi++){
+ for(wj=o;wj<o+OC_MASK_WINDOW_SIZES[gi];wj++){
+ group_sums[gi]+=mask_groups[wi][wj][gi];
+ }
+ }
+ }
+ fplane=psych->enc->state.fplanes+pli;
+ mfragi=fplane->froffset+(y_procd>>3)*fplane->nhfrags;
+ for(fragx=0;fragx<fplane->nhfrags;fragx++,mfragi++){
+ /*Add the parts of the group neighborhoods that are new.*/
+ mfragj=fragx+OC_MASK_WINDOW_SZ_MAX-1;
+ for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+ wj=OC_MASK_WINDOW_SIZES[gi];
+ for(wi=o-wj;wi<=o+wj;wi++){
+ group_sums[gi]+=mask_groups[wi][mfragj+wj][gi];
+ }
+ }
+ /*These groups are so small it is not worth incremental updates.*/
+ for(;gi<OC_MASK_NGROUPS;gi++){
+ group_sums[gi]=mask_groups[o][mfragj][gi]+
+ mask_groups[o-1][mfragj][gi]+mask_groups[o+1][mfragj][gi]+
+ mask_groups[o][mfragj-1][gi]+mask_groups[o][mfragj+1][gi];
+ }
+ /*Mask the coefficients in this block.*/
+ /*An offset is added to the numerator and denominator to prevent
+ against division by zero.
+ In effect, this is like a small addition of noise to the signal, but
+ it should be well below the visual threshold.
+ This is the best solution I could come up with to handle the problem
+ of 0's in the intra DCT coefficients that, because of motion
+ compensation, are not 0 in the inter DCT coefficients and thus need
+ a valid weight.
+ The DC coefficient has a different offset added than the others,
+ because the pixel values have 128 subtracted from them before the
+ DCT is performed, which offsets the DC coefficient by 4096.*/
+ efrag=psych->enc->frinfo+mfragi;
+ csfw=psych->csf_row[fragx];
+ maskw=maskw_row[fragx];
+ /*Compute the scaling value for this fragment.*/
+ fscale=pscale*efrag->imp_weight/psych->enc->vbr->qscale;
+ /*The DC coefficient is not masked.*/
+ efrag->tols[0]=(ogg_uint16_t)OC_MINI(65535,(int)(
+ (OC_CSF_DC_SHIFT+abs(efrag->dct_coeffs[0]))/
+ (fscale*(csf_offset[0]+csfw[0]))));
+ psych->enc->vbr->dc_tol_mins[pli]=(unsigned)OC_MINI(
+ psych->enc->vbr->dc_tol_mins[pli],efrag->tols[0]);
+ /*The remaining coefficients are masked.*/
+ for(ci=1;ci<64;ci++){
+ float mask;
+ gi=OC_MASK_GROUP[ci];
+ mask=group_sums[gi]-maskw[ci];
+ efrag->tols[ci]=(ogg_uint16_t)OC_MINI(65535,(int)(
+ ((OC_CSF_NOISE_LEVEL+abs(efrag->dct_coeffs[ci]))*
+ (1+OC_MASK_WEIGHTS[gi]*mask))/
+ (fscale*(csf_offset[ci]+csfw[ci]))));
+ }
+ /*Select minimum qi values for each quantizer type.*/
+ for(qti=2;qti-->0;){
+ unsigned qmin;
+ int qi_min;
+ /*This is the minimum quantizer Theora allows.
+ Don't inflate the qi unnecessarily if we have a tolerance less
+ than this.*/
+ qmin=OC_AC_QUANT_MIN[qti];
+ qi_min=qti?0:efrag->qi_min[1];
+ for(ci=0;qi_min<63;qi_min++){
+ while(psych->enc->state.dequant_tables[qti][pli][qi_min][ci]<=
+ OC_MAXI(2U*efrag->tols[ci],qmin)&&++ci<64);
+ if(ci==64)break;
+ }
+ efrag->qi_min[qti]=(unsigned char)qi_min;
+ }
+ #if 0
+ /*Now undo all the work we did above and just use a constant quantizer
+ value for testing purposes.*/
+ efrag->qi_min[0]=efrag->qi_min[1]=_psych->enc->state.info.quality;
+ for(ci=0;ci<64;ci++){
+ efrag->tols[ci]=OC_MINI(
+ psych->enc->state.dequant_tables[0][pli][efrag->qi_min[0]][ci],
+ psych->enc->state.dequant_tables[1][pli][efrag->qi_min[1]][ci])+
+ 1>>1;
+ }
+ #endif
+ /*Remove the parts of the group neighborhoods that are old.*/
+ for(gi=0;gi<OC_MASK_NFULL_GROUPS;gi++){
+ wj=OC_MASK_WINDOW_SIZES[gi];
+ for(wi=o-wj;wi<=o+wj;wi++){
+ group_sums[gi]-=mask_groups[wi][mfragj-wj][gi];
+ }
+ }
+ }
+ }
+ /*Move the sliding windows.
+ The nice thing about these manually allocated 2D arrays is that we can
+ move rows around just by moving around the initial pointers to them,
+ not actually copying their contents.*/
+ maskg_row=mask_groups[0];
+ for(rowi=0;rowi<OC_MASK_WINDOW_SZ_MAX-1<<1;rowi++){
+ mask_groups[rowi]=mask_groups[rowi+1];
+ }
+ mask_groups[OC_MASK_WINDOW_SZ_MAX-1<<1]=maskg_row;
+ maskw_row=psych->mask_weights[pli][0];
+ csfw_row=psych->csf_weights[pli][0];
+ for(rowi=0;rowi<OC_MASK_WINDOW_SZ_MAX-1;rowi++){
+ psych->mask_weights[pli][rowi]=psych->mask_weights[pli][rowi+1];
+ psych->csf_weights[pli][rowi]=psych->csf_weights[pli][rowi+1];
+ }
+ psych->mask_weights[pli][OC_MASK_WINDOW_SZ_MAX-1]=maskw_row;
+ psych->csf_weights[pli][OC_MASK_WINDOW_SZ_MAX-1]=csfw_row;
+ y_procd+=8;
+ }
+ _stage->y_procd[pli]=y_procd;
+ }
+ return 0;
+}
+
+static int oc_mask_pipe_end(oc_enc_pipe_stage *_stage){
+ oc_psych_ctx *psych;
+ int y_avail[3];
+ int rowi;
+ int pli;
+ int ret;
+ psych=_stage->enc->vbr->psych;
+ /*To finish up this stage, we need to keep the sliding windows sliding, so
+ we make a separate call to pipe_process() for each row.*/
+ for(pli=0;pli<3;pli++)y_avail[pli]=_stage->enc->state.input[pli].height;
+ for(rowi=OC_MASK_WINDOW_SZ_MAX-1;rowi-->0;){
+ for(pli=0;pli<3;pli++){
+ oc_fragment_plane *fplane;
+ fplane=psych->enc->state.fplanes+pli;
+ /*Just clear out the mask group weights for the rows below the image.*/
+ memset(psych->mask_groups[pli][OC_MASK_WINDOW_SZ_MAX-1<<1][0],0,
+ sizeof(oc_mask_block)*(fplane->nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1)));
+ y_avail[pli]+=8;
+ }
+ ret=oc_mask_pipe_process(_stage,y_avail);
+ if(ret<0)return ret;
+ }
+ /*Because dc_tol_mins[] is not complete until each plane has been completely
+ processed, we wait until then to execute the next pipeline stage.*/
+ if(_stage->next!=NULL){
+ ret=(*_stage->next->pipe_start)(_stage->next);
+ if(ret<0)return ret;
+ ret=(*_stage->next->pipe_proc)(_stage->next,_stage->y_procd);
+ if(ret<0)return ret;
+ return (*_stage->next->pipe_end)(_stage->next);
+ }
+ return 0;
+}
+
+/*Initialize the spatial masking stage of the pipeline.
+ _enc: The encoding context.*/
+static void oc_mask_pipe_init(oc_enc_pipe_stage *_stage,oc_enc_ctx *_enc){
+ _stage->enc=_enc;
+ _stage->next=NULL;
+ _stage->pipe_start=oc_mask_pipe_start;
+ _stage->pipe_proc=oc_mask_pipe_process;
+ _stage->pipe_end=oc_mask_pipe_end;
+}
+
+#if 0
static void oc_psych_scan_plane(oc_psych_ctx *_psych,int _pli){
oc_csf_filter *vfilters;
oc_csf_filter *hfilters;
@@ -2081,7 +2465,7 @@
int k;
csf_offset=_psych->csf_offsets[_pli];
/*Initialize the minimum psychovisual tolerance for the DC coefficient.*/
- _psych->enc->dc_tol_mins[_pli]=32767;
+ _psych->enc->vbr->dc_tol_mins[_pli]=32767;
/*Select the filter sets we're going to use.*/
vfilti=_pli<<1;
hfilti=_pli>0?(_pli<<1)-1:0;
@@ -2226,13 +2610,13 @@
csfw=_psych->csf_weights[0][fragx];
maskw=maskw_row[fragx];
/*Compute the scaling value for this fragment.*/
- fscale=pscale*efrag->imp_weight/_psych->enc->qscale;
+ fscale=pscale*efrag->imp_weight/_psych->enc->vbr->qscale;
/*The DC coefficient is not masked.*/
efrag->tols[0]=(ogg_uint16_t)OC_MINI(65535,(int)(
(OC_CSF_DC_SHIFT+abs(efrag->dct_coeffs[0]))/
(fscale*(csf_offset[0]+csfw[0]))));
- _psych->enc->dc_tol_mins[_pli]=(unsigned)OC_MINI(
- _psych->enc->dc_tol_mins[_pli],efrag->tols[0]);
+ _psych->enc->vbr->dc_tol_mins[_pli]=(unsigned)OC_MINI(
+ _psych->enc->vbr->dc_tol_mins[_pli],efrag->tols[0]);
/*The remaining coefficients are masked.*/
for(i=1;i<64;i++){
float mask;
@@ -2299,38 +2683,58 @@
_psych->csf_weights[OC_MASK_WINDOW_SZ_MAX-1]=csfw_row;
}
}
+#endif
oc_psych_ctx *oc_psych_alloc(oc_enc_ctx *_enc){
oc_psych_ctx *psych;
int nhfrags;
+ int pli;
nhfrags=_enc->state.fplanes[0].nhfrags;
psych=(oc_psych_ctx *)_ogg_malloc(sizeof(*psych));
psych->csf_row=(oc_weight_block *)_ogg_calloc(
(nhfrags+(OC_CSF_FILTER_SZ_MAX-1<<1)),sizeof(psych->csf_row[0]));
- psych->csf_weights=(oc_weight_block **)oc_malloc_2d(
- OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(psych->csf_weights[0][0]));
- psych->mask_groups=(oc_mask_block **)oc_malloc_2d(
- (OC_MASK_WINDOW_SZ_MAX<<1)-1,nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1),
- sizeof(psych->mask_groups[0][0]));
- psych->mask_weights=(oc_weight_block **)oc_malloc_2d(
- OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(psych->mask_weights[0][0]));
+ for(pli=0;pli<3;pli++){
+ nhfrags=_enc->state.fplanes[pli].nhfrags;
+ psych->csf_weights[pli]=(oc_weight_block **)oc_malloc_2d(
+ OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(oc_weight_block));
+ psych->mask_groups[pli]=(oc_mask_block **)oc_malloc_2d(
+ (OC_MASK_WINDOW_SZ_MAX<<1)-1,nhfrags+(OC_MASK_WINDOW_SZ_MAX-1<<1),
+ sizeof(oc_mask_block));
+ psych->mask_weights[pli]=(oc_weight_block **)oc_malloc_2d(
+ OC_MASK_WINDOW_SZ_MAX,nhfrags,sizeof(oc_weight_block));
+ }
psych->enc=_enc;
+ /*Initialize our pipeline stages.*/
+ oc_csf_pipe_init(&psych->csf_pipe,_enc);
+ oc_mask_pipe_init(&psych->mask_pipe,_enc);
+ psych->csf_pipe.next=&psych->mask_pipe;
return psych;
}
void oc_psych_free(oc_psych_ctx *_psych){
if(_psych!=NULL){
+ int pli;
_ogg_free(_psych->csf_row);
- oc_free_2d((void **)_psych->csf_weights);
- oc_free_2d((void **)_psych->mask_groups);
- oc_free_2d((void **)_psych->mask_weights);
+ for(pli=0;pli<3;pli++){
+ oc_free_2d((void **)_psych->csf_weights[pli]);
+ oc_free_2d((void **)_psych->mask_groups[pli]);
+ oc_free_2d((void **)_psych->mask_weights[pli]);
+ }
_ogg_free(_psych);
}
}
+oc_enc_pipe_stage *oc_psych_prepend_to_pipe(oc_psych_ctx *_psych,
+ oc_enc_pipe_stage *_next){
+ _psych->mask_pipe.next=_next;
+ return &_psych->csf_pipe;
+}
+
+#if 0
void oc_psych_scan(oc_psych_ctx *_psych,float _contrast){
int pli;
oc_psych_csf_filters_interpolate(_psych,_contrast);
for(pli=0;pli<3;pli++)oc_psych_scan_plane(_psych,pli);
}
+#endif
Modified: experimental/derf/theora-exp/lib/psych.h
===================================================================
--- experimental/derf/theora-exp/lib/psych.h 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/lib/psych.h 2005-09-18 00:58:06 UTC (rev 10030)
@@ -1,5 +1,6 @@
#if !defined(_psych_H)
# define _psych_H (1)
+# include "encvbr.h"
/*The assumed screen resolution vs. viewing distance.
This is taken to be constant under the assumption that viewers will sit
Modified: experimental/derf/theora-exp/unix/Makefile
===================================================================
--- experimental/derf/theora-exp/unix/Makefile 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/unix/Makefile 2005-09-18 00:58:06 UTC (rev 10030)
@@ -78,8 +78,10 @@
enquant.c \
fdct.c \
huffenc.c \
+mcenc.c \
+encmsc.c \
+encvbr.c \
impmap.c \
-mcenc.c \
psych.c \
LIBTHEORAENC_CHEADERS = \
@@ -88,6 +90,7 @@
enquant.h \
fdct.h \
huffenc.h \
+encvbr.h \
psych.h \
DUMP_VIDEO_CSOURCES = dump_video.c
Modified: experimental/derf/theora-exp/win32/msvc60/dump_video.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/dump_video.dsp 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/dump_video.dsp 2005-09-18 00:58:06 UTC (rev 10030)
@@ -74,7 +74,7 @@
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpng.lib zlib.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpngd.lib zlibd_static.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
Modified: experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/encoder_example.dsp 2005-09-18 00:58:06 UTC (rev 10030)
@@ -39,9 +39,10 @@
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release_encoder_example"
# PROP Intermediate_Dir "Release_encoder_example"
+# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -65,7 +66,7 @@
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../../../../trunk/vorbis/include" /I "../../include" /I "../compatibility" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D GETOPT_API= /U "OC_444_MODE" /U "OC_422_MODE" /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -73,7 +74,7 @@
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpng.lib zlib.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib libpngd.lib zlibd_static.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
Modified: experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theorabase_static.dsp 2005-09-18 00:58:06 UTC (rev 10030)
@@ -41,7 +41,7 @@
# PROP Intermediate_Dir "Release_theorabase_static"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "NDEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -64,7 +64,7 @@
# PROP Intermediate_Dir "Debug_theorabase_static"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "_DEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -113,6 +113,10 @@
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\dct.h
# End Source File
# Begin Source File
Modified: experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theoradec_static.dsp 2005-09-18 00:58:06 UTC (rev 10030)
@@ -105,6 +105,10 @@
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\decint.h
# End Source File
# Begin Source File
@@ -137,7 +141,7 @@
# End Source File
# Begin Source File
-SOURCE=..\..\include\theora\theora.h
+SOURCE=..\..\include\theora\theoradec.h
# End Source File
# End Group
# End Target
Modified: experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp
===================================================================
--- experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp 2005-09-18 00:30:40 UTC (rev 10029)
+++ experimental/derf/theora-exp/win32/msvc60/theoraenc_static.dsp 2005-09-18 00:58:06 UTC (rev 10030)
@@ -41,7 +41,7 @@
# PROP Intermediate_Dir "Release_theoraenc_static"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /Ob2 /I "../../../../../trunk/ogg/include" /I "../../include" /D "NDEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -64,7 +64,7 @@
# PROP Intermediate_Dir "Debug_theoraenc_static"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W4 /Gm /GX /ZI /Od /I "../../../../../trunk/ogg/include" /I "../../include" /D "_DEBUG" /D "WIN32" /D "_MBCS" /D "_LIB" /U "OC_DUMP_IMAGES" /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -93,10 +93,18 @@
# End Source File
# Begin Source File
+SOURCE=..\..\lib\encmsc.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\encode.c
# End Source File
# Begin Source File
+SOURCE=..\..\lib\encvbr.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\enquant.c
# End Source File
# Begin Source File
@@ -125,6 +133,10 @@
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
+SOURCE=..\..\include\theora\codec.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\dct.h
# End Source File
# Begin Source File
@@ -133,6 +145,10 @@
# End Source File
# Begin Source File
+SOURCE=..\..\lib\encvbr.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\lib\enquant.h
# End Source File
# Begin Source File
@@ -165,7 +181,7 @@
# End Source File
# Begin Source File
-SOURCE=..\..\include\theora\theora.h
+SOURCE=..\..\include\theora\theoraenc.h
# End Source File
# End Group
# End Target
More information about the commits
mailing list