[xiph-commits] r13941 - in branches/theora-multithread: examples
include/theora lib lib/enc
portavales at svn.xiph.org
portavales at svn.xiph.org
Sat Oct 6 15:15:56 PDT 2007
Author: portavales
Date: 2007-10-06 15:15:55 -0700 (Sat, 06 Oct 2007)
New Revision: 13941
Modified:
branches/theora-multithread/examples/encoder_example.c
branches/theora-multithread/include/theora/theora.h
branches/theora-multithread/lib/Makefile.am
branches/theora-multithread/lib/enc/codec_internal.h
branches/theora-multithread/lib/enc/encode.c
branches/theora-multithread/lib/enc/encoder_toplevel.c
Log:
First Attempt to paralellize the Encoder: This first attempt only paralellized the Motion Vector Search part of the algorithm, using pthreads. Cheers, Felipe Portavales Goldstein
Modified: branches/theora-multithread/examples/encoder_example.c
===================================================================
--- branches/theora-multithread/examples/encoder_example.c 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/examples/encoder_example.c 2007-10-06 22:15:55 UTC (rev 13941)
@@ -89,6 +89,7 @@
{"noise-sensitivity",required_argument,NULL,'n'},
{"sharpness",required_argument,NULL,'m'},
{"keyframe-freq",required_argument,NULL,'k'},
+ {"number-of-threads",required_argument,NULL,'t'},
{NULL,0,NULL,0}
};
@@ -120,6 +121,8 @@
int sharpness=0;
int keyframe_frequency=64;
+int numThreads=4;
+
static void usage(void){
fprintf(stderr,
"Usage: encoder_example [options] [audio_file] video_file\n\n"
@@ -160,6 +163,8 @@
" reduce file size but resulting video\n"
" is blurrier, defaults to 0)\n"
" -k --keyframe-freq <n> Keyframe frequency from 8 to 1000\n"
+ " -t --number-of-threads <n> The number of threads to run the\n"
+ " Motion Vector Search\n"
"encoder_example accepts only uncompressed RIFF WAV format audio and\n"
"YUV4MPEG2 uncompressed video.\n\n");
exit(1);
@@ -732,6 +737,14 @@
}
break;
+ case 't':
+ numThreads=rint(atof(optarg));
+ if(numThreads<1){
+ fprintf(stderr,"Illegal number of Threads\n");
+ exit(1);
+ }
+ break;
+
default:
usage();
}
@@ -806,9 +819,13 @@
ti.noise_sensitivity=noise_sensitivity;
ti.sharpness=sharpness;
+
theora_encode_init(&td,&ti);
theora_info_clear(&ti);
+ theora_control(&td,TH_ENCCTL_SET_NUM_THREADS,&numThreads,sizeof(numThreads));
+
+
/* initialize Vorbis too, assuming we have audio to compress. */
if(audio){
vorbis_info_init(&vi);
@@ -899,7 +916,7 @@
fwrite(og.body,1,og.body_len,outfile);
}
}
-
+ fprintf(stderr,"Number of Threads: %d\n", numThreads);
/* setup complete. Raw processing loop */
fprintf(stderr,"Compressing....\n");
while(1){
Modified: branches/theora-multithread/include/theora/theora.h
===================================================================
--- branches/theora-multithread/include/theora/theora.h 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/include/theora/theora.h 2007-10-06 22:15:55 UTC (rev 13941)
@@ -467,6 +467,12 @@
* \retval TH_EINVAL \a buf_sz is not <tt>sizeof(#th_cqi_cfg)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SETUP_CQI (18)
+/**Set the number of Threads running the Motion Vector Search
+ *
+ * \param[in] buf int: The number of threads.
+ */
+#define TH_ENCCTL_SET_NUM_THREADS (20)
+
/*@}*/
#define OC_FAULT -1 /**< General failure */
Modified: branches/theora-multithread/lib/Makefile.am
===================================================================
--- branches/theora-multithread/lib/Makefile.am 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/lib/Makefile.am 2007-10-06 22:15:55 UTC (rev 13941)
@@ -134,7 +134,7 @@
libtheora_la_CFLAGS = $(OGG_CFLAGS)
-libtheora_la_LDFLAGS = -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ @THEORA_LDFLAGS@
+libtheora_la_LDFLAGS = -lpthread -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ @THEORA_LDFLAGS@
libtheora_la_LIBADD = $(OGG_LIBS)
debug:
Modified: branches/theora-multithread/lib/enc/codec_internal.h
===================================================================
--- branches/theora-multithread/lib/enc/codec_internal.h 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/lib/enc/codec_internal.h 2007-10-06 22:15:55 UTC (rev 13941)
@@ -716,6 +716,7 @@
DspFunctions dsp; /* Selected functions for this platform */
+ int numThreads; /* Number of Threads to run the MV Search */
} CP_INSTANCE;
#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
Modified: branches/theora-multithread/lib/enc/encode.c
===================================================================
--- branches/theora-multithread/lib/enc/encode.c 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/lib/enc/encode.c 2007-10-06 22:15:55 UTC (rev 13941)
@@ -15,6 +15,7 @@
********************************************************************/
+#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include "codec_internal.h"
@@ -984,186 +985,183 @@
return 0;
}
-static void AddMotionVector(CP_INSTANCE *cpi,
- MOTION_VECTOR *ThisMotionVector) {
- cpi->MVList[cpi->MvListCount].x = ThisMotionVector->x;
- cpi->MVList[cpi->MvListCount].y = ThisMotionVector->y;
- cpi->MvListCount++;
+
+typedef struct threadMvList {
+ MOTION_VECTOR mv;
+ unsigned char valid;
+} THREAD_MV_LIST;
+
+typedef struct thread_param {
+ CP_INSTANCE *cpi;
+ ogg_uint32_t SBStartRow;
+ ogg_uint32_t SBEndRow;
+ ogg_uint32_t SBCols;
+ ogg_uint32_t PixelsPerLine;
+ THREAD_MV_LIST* mvList;
+ ogg_uint32_t *InterError;
+ ogg_uint32_t *IntraError;
+} THREAD_PARAM;
+
+static void AddMotionVector(THREAD_MV_LIST* mvList,
+ ogg_int32_t SB, ogg_int32_t MB, ogg_int32_t B,
+ MOTION_VECTOR *ThisMotionVector) {
+ const ogg_int32_t FragIndex = SB*(4*4) + MB*(2*2) + B;
+ /* ATENTION: This mvList is not in raster order,
+ It is in the Hilber curve order.
+ This is just to keep the added Motion Vector in the
+ correct order of insertion */
+ mvList[FragIndex].mv.x = ThisMotionVector->x;
+ mvList[FragIndex].mv.y = ThisMotionVector->y;
+ mvList[FragIndex].valid = 1;
}
static void SetFragMotionVectorAndMode(CP_INSTANCE *cpi,
- ogg_int32_t FragIndex,
- MOTION_VECTOR *ThisMotionVector){
+ ogg_int32_t FragIndex,
+ MOTION_VECTOR *ThisMotionVector,
+ unsigned char MBCodingMode){
/* Note the coding mode and vector for each block */
cpi->pb.FragMVect[FragIndex].x = ThisMotionVector->x;
cpi->pb.FragMVect[FragIndex].y = ThisMotionVector->y;
- cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode;
+ cpi->pb.FragCodingMethod[FragIndex] = MBCodingMode;
}
static void SetMBMotionVectorsAndMode(CP_INSTANCE *cpi,
- ogg_int32_t YFragIndex,
- ogg_int32_t UFragIndex,
- ogg_int32_t VFragIndex,
- MOTION_VECTOR *ThisMotionVector){
- SetFragMotionVectorAndMode(cpi, YFragIndex, ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, YFragIndex + 1, ThisMotionVector);
+ ogg_int32_t YFragIndex,
+ ogg_int32_t UFragIndex,
+ ogg_int32_t VFragIndex,
+ MOTION_VECTOR *ThisMotionVector,
+ unsigned char MBCodingMode){
+ SetFragMotionVectorAndMode(cpi, YFragIndex, ThisMotionVector, MBCodingMode);
+ SetFragMotionVectorAndMode(cpi, YFragIndex + 1, ThisMotionVector, MBCodingMode);
SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
- ThisMotionVector);
+ ThisMotionVector, MBCodingMode);
SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
- ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, UFragIndex, ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, VFragIndex, ThisMotionVector);
+ ThisMotionVector, MBCodingMode);
+ SetFragMotionVectorAndMode(cpi, UFragIndex, ThisMotionVector, MBCodingMode);
+ SetFragMotionVectorAndMode(cpi, VFragIndex, ThisMotionVector, MBCodingMode);
}
-ogg_uint32_t PickModes(CP_INSTANCE *cpi,
- ogg_uint32_t SBRows, ogg_uint32_t SBCols,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t *InterError, ogg_uint32_t *IntraError) {
+
+void* ThreadPickModes(void *arg) {
+ THREAD_PARAM* p = (THREAD_PARAM*)arg;
+
+ CP_INSTANCE *cpi = p->cpi;
+ const ogg_uint32_t SBStartRow = p->SBStartRow;
+ const ogg_uint32_t SBEndRow = p->SBEndRow;
+ const ogg_uint32_t SBCols = p->SBCols;
+ const ogg_uint32_t PixelsPerLine = p->PixelsPerLine;
+ THREAD_MV_LIST* mvList = p->mvList;
+ ogg_uint32_t *InterError = p->InterError;
+ ogg_uint32_t *IntraError = p->IntraError;
+
+
+ ogg_uint32_t SBrow; /* Super-Block row number */
+ ogg_uint32_t SBcol; /* Super-Block row number */
+ ogg_uint32_t SB; /* Super-Block indice */
+ ogg_uint32_t MB, B; /* Macro-Block, Block indices */
+
+ int MBCodedFlag;
+
ogg_int32_t YFragIndex;
ogg_int32_t UFragIndex;
ogg_int32_t VFragIndex;
- ogg_uint32_t MB, B; /* Macro-Block, Block indices */
- ogg_uint32_t SBrow; /* Super-Block row number */
- ogg_uint32_t SBcol; /* Super-Block row number */
- ogg_uint32_t SB=0; /* Super-Block index, initialised to first
- of this component */
+ ogg_uint32_t UVRow;
+ ogg_uint32_t UVColumn;
+ ogg_uint32_t UVFragOffset;
+
+
+ ogg_uint32_t BestError; /* Best error so far. */
ogg_uint32_t MBIntraError; /* Intra error for macro block */
ogg_uint32_t MBGFError; /* Golden frame macro block error */
- ogg_uint32_t MBGF_MVError; /* Golden frame plus MV error */
- ogg_uint32_t LastMBGF_MVError; /* Golden frame error with
- last used GF motion
- vector. */
ogg_uint32_t MBInterError; /* Inter no MV macro block error */
+
ogg_uint32_t MBLastInterError; /* Inter with last used MV */
ogg_uint32_t MBPriorLastInterError; /* Inter with prior last MV */
+
+
+ MOTION_VECTOR InterMVect; /* storage for motion vector */
ogg_uint32_t MBInterMVError; /* Inter MV macro block error */
- ogg_uint32_t MBInterMVExError; /* Inter MV (exhaustive
- search) macro block error */
+
+ MOTION_VECTOR InterMVectEx; /* storage for motion vector result
+ from exhaustive search */
+ ogg_uint32_t MBInterMVExError; /* Inter MV (exhaustive
+ search) macro block error */
+
+ ogg_uint32_t MBGF_MVError; /* Golden frame plus MV error */
+ ogg_uint32_t LastMBGF_MVError; /* Golden frame error with
+ last used GF motion
+ vector. */
+ MOTION_VECTOR GFMVect; /* storage for motion vector */
+
+
ogg_uint32_t MBInterFOURMVError; /* Inter MV error when using 4
motion vectors per macro
block */
- ogg_uint32_t BestError; /* Best error so far. */
-
MOTION_VECTOR FourMVect[6]; /* storage for last used vectors (one
entry for each block in MB) */
+
MOTION_VECTOR LastInterMVect; /* storage for last used Inter frame
MB motion vector */
MOTION_VECTOR PriorLastInterMVect; /* storage for prior last used
Inter frame MB motion vector */
- MOTION_VECTOR TmpMVect; /* Temporary MV storage */
MOTION_VECTOR LastGFMVect; /* storage for last used Golden
Frame MB motion vector */
- MOTION_VECTOR InterMVect; /* storage for motion vector */
- MOTION_VECTOR InterMVectEx; /* storage for motion vector result
- from exhaustive search */
- MOTION_VECTOR GFMVect; /* storage for motion vector */
MOTION_VECTOR ZeroVect;
+ ZeroVect.x = 0;
+ ZeroVect.y = 0;
- ogg_uint32_t UVRow;
- ogg_uint32_t UVColumn;
- ogg_uint32_t UVFragOffset;
-
- int MBCodedFlag;
- unsigned char QIndex;
-
- /* initialize error scores */
- *InterError = 0;
- *IntraError = 0;
-
- /* clear down the default motion vector. */
- cpi->MvListCount = 0;
- FourMVect[0].x = 0;
- FourMVect[0].y = 0;
- FourMVect[1].x = 0;
- FourMVect[1].y = 0;
- FourMVect[2].x = 0;
- FourMVect[2].y = 0;
- FourMVect[3].x = 0;
- FourMVect[3].y = 0;
- FourMVect[4].x = 0;
- FourMVect[4].y = 0;
- FourMVect[5].x = 0;
- FourMVect[5].y = 0;
+ /* Reset the Last MVs when change the row.
+ It must be done because the SBs are not passed on the raster order */
LastInterMVect.x = 0;
LastInterMVect.y = 0;
PriorLastInterMVect.x = 0;
PriorLastInterMVect.y = 0;
LastGFMVect.x = 0;
LastGFMVect.y = 0;
- InterMVect.x = 0;
- InterMVect.y = 0;
- GFMVect.x = 0;
- GFMVect.y = 0;
- ZeroVect.x = 0;
- ZeroVect.y = 0;
+ for( SBrow=SBStartRow; SBrow < SBEndRow; SBrow++){
+ for( SBcol=0; SBcol < SBCols; SBcol++){
- QIndex = (unsigned char)cpi->pb.FrameQIndex;
+ SB = SBrow*SBCols + SBcol;
- if(!cpi->MotionCompensation)
- return 0;
-
- /* change the quatization matrix to the one at best Q to compute the
- new error score */
- cpi->MinImprovementForNewMV = (MvThreshTable[QIndex] << 12);
- cpi->InterTripOutThresh = (5000<<12);
- cpi->MVChangeFactor = MVChangeFactorTable[QIndex]; /* 0.9 */
-
- if ( cpi->pb.info.quick_p ) {
- cpi->ExhaustiveSearchThresh = (1000<<12);
- cpi->FourMVThreshold = (2500<<12);
- } else {
- cpi->ExhaustiveSearchThresh = (250<<12);
- cpi->FourMVThreshold = (500<<12);
- }
- cpi->MinImprovementForFourMV = cpi->MinImprovementForNewMV * 4;
-
- if(cpi->MinImprovementForFourMV < (40<<12))
- cpi->MinImprovementForFourMV = (40<<12);
-
- cpi->FourMvChangeFactor = 8; /* cpi->MVChangeFactor - 0.05; */
-
- /* decide what block type and motion vectors to use on all of the frames */
- for ( SBrow=0; SBrow<SBRows; SBrow++ ) {
- for ( SBcol=0; SBcol<SBCols; SBcol++ ) {
/* Check its four Macro-Blocks */
for ( MB=0; MB<4; MB++ ) {
- /* There may be MB's lying out of frame which must be
- ignored. For these MB's Top left block will have a negative
- Fragment Index. */
- if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) < 0 ) continue;
+ /* There may be MB's lying out of frame which must be
+ ignored. For these MB's Top left block will have a negative
+ Fragment Index. */
+ if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) < 0 ) continue;
- /* Is the current macro block coded (in part or in whole) */
- MBCodedFlag = 0;
- for ( B=0; B<4; B++ ) {
- YFragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
+ /* Is the current macro block coded (in part or in whole) */
+ MBCodedFlag = 0;
+ for ( B=0; B<4; B++ ) {
+ YFragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
- /* Does Block lie in frame: */
- if ( YFragIndex >= 0 ) {
- /* In Frame: Is it coded: */
- if ( cpi->pb.display_fragments[YFragIndex] ) {
- MBCodedFlag = 1;
- break;
- }
- } else
- MBCodedFlag = 0;
- }
+ /* Does Block lie in frame: */
+ if ( YFragIndex >= 0 ) {
+ /* In Frame: Is it coded: */
+ if ( cpi->pb.display_fragments[YFragIndex] ) {
+ MBCodedFlag = 1;
+ break;
+ }
+ } else
+ MBCodedFlag = 0;
+ }
- /* This one isn't coded go to the next one */
- if(!MBCodedFlag) continue;
+ /* This one isn't coded go to the next one */
+ if(!MBCodedFlag) continue;
- /* Calculate U and V FragIndex from YFragIndex */
- YFragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB,MB);
- UVRow = (YFragIndex / (cpi->pb.HFragments * 2));
- UVColumn = (YFragIndex % cpi->pb.HFragments) / 2;
- UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
- UFragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
- VFragIndex = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments +
- UVFragOffset;
+ /* Calculate U and V FragIndex from YFragIndex */
+ YFragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB,MB);
+ UVRow = (YFragIndex / (cpi->pb.HFragments * 2));
+ UVColumn = (YFragIndex % cpi->pb.HFragments) / 2;
+ UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
+ UFragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
+ VFragIndex = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments +
+ UVFragOffset;
- /**************************************************************
+ /**************************************************************
Find the block choice with the lowest error
NOTE THAT if U or V is coded but no Y from a macro block then
@@ -1171,147 +1169,149 @@
state to which the mode data structure is initialised in
encoder and decoder at the start of each frame. */
- BestError = HUGE_ERROR;
+ BestError = HUGE_ERROR;
- /* Look at the intra coding error. */
- MBIntraError = GetMBIntraError( cpi, YFragIndex, PixelsPerLine );
- BestError = (BestError > MBIntraError) ? MBIntraError : BestError;
+ /* Look at the intra coding error. */
+ MBIntraError = GetMBIntraError( cpi, YFragIndex, PixelsPerLine );
+ BestError = (BestError > MBIntraError) ? MBIntraError : BestError;
- /* Get the golden frame error */
- MBGFError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.GoldenFrame, YFragIndex,
- 0, 0, PixelsPerLine );
- BestError = (BestError > MBGFError) ? MBGFError : BestError;
+ /* Get the golden frame error */
+ MBGFError = GetMBInterError( cpi, cpi->ConvDestBuffer,
+ cpi->pb.GoldenFrame, YFragIndex,
+ 0, 0, PixelsPerLine );
+ BestError = (BestError > MBGFError) ? MBGFError : BestError;
- /* Calculate the 0,0 case. */
- MBInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex, 0, 0, PixelsPerLine );
- BestError = (BestError > MBInterError) ? MBInterError : BestError;
+ /* Calculate the 0,0 case. */
+ MBInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
+ cpi->pb.LastFrameRecon,
+ YFragIndex, 0, 0, PixelsPerLine );
+ BestError = (BestError > MBInterError) ? MBInterError : BestError;
- /* Measure error for last MV */
- MBLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex, LastInterMVect.x,
- LastInterMVect.y, PixelsPerLine );
- BestError = (BestError > MBLastInterError) ?
- MBLastInterError : BestError;
+ /* Measure error for last MV */
+ MBLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
+ cpi->pb.LastFrameRecon,
+ YFragIndex, LastInterMVect.x,
+ LastInterMVect.y, PixelsPerLine );
+ BestError = (BestError > MBLastInterError) ?
+ MBLastInterError : BestError;
- /* Measure error for prior last MV */
- MBPriorLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex,
- PriorLastInterMVect.x,
- PriorLastInterMVect.y,
- PixelsPerLine );
- BestError = (BestError > MBPriorLastInterError) ?
- MBPriorLastInterError : BestError;
+ /* Measure error for prior last MV */
+ MBPriorLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
+ cpi->pb.LastFrameRecon,
+ YFragIndex,
+ PriorLastInterMVect.x,
+ PriorLastInterMVect.y,
+ PixelsPerLine );
+ BestError = (BestError > MBPriorLastInterError) ?
+ MBPriorLastInterError : BestError;
- /* Temporarily force usage of no motionvector blocks */
- MBInterMVError = HUGE_ERROR;
- InterMVect.x = 0; /* Set 0,0 motion vector */
- InterMVect.y = 0;
+ /* Temporarily force usage of no motionvector blocks */
+ MBInterMVError = HUGE_ERROR;
+ InterMVect.x = 0; /* Set 0,0 motion vector */
+ InterMVect.y = 0;
- /* If the best error is above the required threshold search
- for a new inter MV */
- if ( BestError > cpi->MinImprovementForNewMV ) {
- /* Use a mix of heirachical and exhaustive searches for
- quick mode. */
- if ( cpi->pb.info.quick_p ) {
- MBInterMVError = GetMBMVInterError( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- cpi->MVPixelOffsetY,
- &InterMVect );
+ /* If the best error is above the required threshold search
+ for a new inter MV */
+ if ( BestError > cpi->MinImprovementForNewMV ) {
+ /* Use a mix of heirachical and exhaustive searches for
+ quick mode. */
+ if ( cpi->pb.info.quick_p ) {
+ MBInterMVError = GetMBMVInterError( cpi, cpi->pb.LastFrameRecon,
+ YFragIndex, PixelsPerLine,
+ cpi->MVPixelOffsetY,
+ &InterMVect );
- /* If we still do not have a good match try an exhaustive
- MBMV search */
- if ( (MBInterMVError > cpi->ExhaustiveSearchThresh) &&
- (BestError > cpi->ExhaustiveSearchThresh) ) {
+ /* If we still do not have a good match try an exhaustive
+ MBMV search */
+ if ( (MBInterMVError > cpi->ExhaustiveSearchThresh) &&
+ (BestError > cpi->ExhaustiveSearchThresh) ) {
- MBInterMVExError =
- GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- &InterMVectEx );
+ MBInterMVExError =
+ GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
+ YFragIndex, PixelsPerLine,
+ &InterMVectEx );
- /* Is the Variance measure for the EX search
- better... If so then use it. */
- if ( MBInterMVExError < MBInterMVError ) {
- MBInterMVError = MBInterMVExError;
- InterMVect.x = InterMVectEx.x;
- InterMVect.y = InterMVectEx.y;
- }
- }
- }else{
- /* Use an exhaustive search */
- MBInterMVError =
- GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- &InterMVect );
- }
+ /* Is the Variance measure for the EX search
+ better... If so then use it. */
+ if ( MBInterMVExError < MBInterMVError ) {
+ MBInterMVError = MBInterMVExError;
+ InterMVect.x = InterMVectEx.x;
+ InterMVect.y = InterMVectEx.y;
+ }
+ }
+ }else{
+ /* Use an exhaustive search */
+ MBInterMVError =
+ GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
+ YFragIndex, PixelsPerLine,
+ &InterMVect );
+ }
- /* Is the improvement, if any, good enough to justify a new MV */
- if ( (16 * MBInterMVError < (BestError * cpi->MVChangeFactor)) &&
- ((MBInterMVError + cpi->MinImprovementForNewMV) < BestError) ){
- BestError = MBInterMVError;
- }
+ /* Is the improvement, if any, good enough to justify a new MV */
+ if ( (16 * MBInterMVError < (BestError * cpi->MVChangeFactor)) &&
+ ((MBInterMVError + cpi->MinImprovementForNewMV) < BestError) ){
+ BestError = MBInterMVError;
+ }
- }
+ }
- /* If the best error is still above the required threshold
- search for a golden frame MV */
- MBGF_MVError = HUGE_ERROR;
- GFMVect.x = 0; /* Set 0,0 motion vector */
- GFMVect.y = 0;
- if ( BestError > cpi->MinImprovementForNewMV ) {
- /* Do an MV search in the golden reference frame */
- MBGF_MVError = GetMBMVInterError( cpi, cpi->pb.GoldenFrame,
- YFragIndex, PixelsPerLine,
- cpi->MVPixelOffsetY, &GFMVect );
+ /* If the best error is still above the required threshold
+ search for a golden frame MV */
+ MBGF_MVError = HUGE_ERROR;
+ GFMVect.x = 0; /* Set 0,0 motion vector */
+ GFMVect.y = 0;
- /* Measure error for last GFMV */
- LastMBGF_MVError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.GoldenFrame,
- YFragIndex, LastGFMVect.x,
- LastGFMVect.y, PixelsPerLine );
+ if ( BestError > cpi->MinImprovementForNewMV ) {
+ /* Do an MV search in the golden reference frame */
+ MBGF_MVError = GetMBMVInterError( cpi, cpi->pb.GoldenFrame,
+ YFragIndex, PixelsPerLine,
+ cpi->MVPixelOffsetY, &GFMVect );
- /* Check against last GF motion vector and reset if the
- search has thrown a worse result. */
- if ( LastMBGF_MVError < MBGF_MVError ) {
- GFMVect.x = LastGFMVect.x;
- GFMVect.y = LastGFMVect.y;
- MBGF_MVError = LastMBGF_MVError;
- }else{
- LastGFMVect.x = GFMVect.x;
- LastGFMVect.y = GFMVect.y;
- }
+ /* Measure error for last GFMV */
+ LastMBGF_MVError = GetMBInterError( cpi, cpi->ConvDestBuffer,
+ cpi->pb.GoldenFrame,
+ YFragIndex, LastGFMVect.x,
+ LastGFMVect.y, PixelsPerLine );
- /* Is the improvement, if any, good enough to justify a new MV */
- if ( (16 * MBGF_MVError < (BestError * cpi->MVChangeFactor)) &&
- ((MBGF_MVError + cpi->MinImprovementForNewMV) < BestError) ) {
- BestError = MBGF_MVError;
- }
- }
+ /* Check against last GF motion vector and reset if the
+ search has thrown a worse result. */
+ if ( LastMBGF_MVError < MBGF_MVError ) {
+ GFMVect.x = LastGFMVect.x;
+ GFMVect.y = LastGFMVect.y;
+ MBGF_MVError = LastMBGF_MVError;
+ }else{
+ LastGFMVect.x = GFMVect.x;
+ LastGFMVect.y = GFMVect.y;
+ }
- /* Finally... If the best error is still to high then consider
- the 4MV mode */
- MBInterFOURMVError = HUGE_ERROR;
- if ( BestError > cpi->FourMVThreshold ) {
- /* Get the 4MV error. */
- MBInterFOURMVError =
- GetFOURMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine, FourMVect );
+ /* Is the improvement, if any, good enough to justify a new MV */
+ if ( (16 * MBGF_MVError < (BestError * cpi->MVChangeFactor)) &&
+ ((MBGF_MVError + cpi->MinImprovementForNewMV) < BestError) ) {
+ BestError = MBGF_MVError;
+ }
+ }
- /* If the improvement is great enough then use the four MV mode */
- if ( ((MBInterFOURMVError + cpi->MinImprovementForFourMV) <
- BestError) && (16 * MBInterFOURMVError <
- (BestError * cpi->FourMvChangeFactor))) {
- BestError = MBInterFOURMVError;
- }
- }
- /********************************************************
+ /* Finally... If the best error is still to high then consider
+ the 4MV mode */
+ MBInterFOURMVError = HUGE_ERROR;
+ if ( BestError > cpi->FourMVThreshold ) {
+ /* Get the 4MV error. */
+ MBInterFOURMVError =
+ GetFOURMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
+ YFragIndex, PixelsPerLine, FourMVect );
+
+ /* If the improvement is great enough then use the four MV mode */
+ if ( ((MBInterFOURMVError + cpi->MinImprovementForFourMV) <
+ BestError) && (16 * MBInterFOURMVError <
+ (BestError * cpi->FourMvChangeFactor))) {
+ BestError = MBInterFOURMVError;
+ }
+ }
+
+ /********************************************************
end finding the best error
*******************************************************
@@ -1320,129 +1320,243 @@
Over-ride and force intra if error high and Intra error similar
Now choose a mode based on lowest error (with bias towards no MV) */
- if ( (BestError > cpi->InterTripOutThresh) &&
- (10 * BestError > MBIntraError * 7 ) ) {
- cpi->MBCodingMode = CODE_INTRA;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBInterError ) {
- cpi->MBCodingMode = CODE_INTER_NO_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBGFError ) {
- cpi->MBCodingMode = CODE_USING_GOLDEN;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBLastInterError ) {
- cpi->MBCodingMode = CODE_INTER_LAST_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&LastInterMVect);
- } else if ( BestError == MBPriorLastInterError ) {
- cpi->MBCodingMode = CODE_INTER_PRIOR_LAST;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&PriorLastInterMVect);
+ if ( (BestError > cpi->InterTripOutThresh) &&
+ (10 * BestError > MBIntraError * 7 ) ) {
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&ZeroVect, CODE_INTRA);
+ } else if ( BestError == MBInterError ) {
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&ZeroVect, CODE_INTER_NO_MV);
+ } else if ( BestError == MBGFError ) {
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&ZeroVect, CODE_USING_GOLDEN);
+ } else if ( BestError == MBLastInterError ) {
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&LastInterMVect, CODE_INTER_LAST_MV);
+ } else if ( BestError == MBPriorLastInterError ) {
+ MOTION_VECTOR TmpMVect; /* Temporary MV storage */
- /* Swap the prior and last MV cases over */
- TmpMVect.x = PriorLastInterMVect.x;
- TmpMVect.y = PriorLastInterMVect.y;
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
- LastInterMVect.x = TmpMVect.x;
- LastInterMVect.y = TmpMVect.y;
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&PriorLastInterMVect, CODE_INTER_PRIOR_LAST);
- } else if ( BestError == MBInterMVError ) {
+ /* Swap the prior and last MV cases over */
+ TmpMVect.x = PriorLastInterMVect.x;
+ TmpMVect.y = PriorLastInterMVect.y;
+ PriorLastInterMVect.x = LastInterMVect.x;
+ PriorLastInterMVect.y = LastInterMVect.y;
+ LastInterMVect.x = TmpMVect.x;
+ LastInterMVect.y = TmpMVect.y;
- cpi->MBCodingMode = CODE_INTER_PLUS_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&InterMVect);
+ } else if ( BestError == MBInterMVError ) {
- /* Update Prior last mv with last mv */
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&InterMVect, CODE_INTER_PLUS_MV);
- /* Note last inter MV for future use */
- LastInterMVect.x = InterMVect.x;
- LastInterMVect.y = InterMVect.y;
+ /* Update Prior last mv with last mv */
+ PriorLastInterMVect.x = LastInterMVect.x;
+ PriorLastInterMVect.y = LastInterMVect.y;
- AddMotionVector( cpi, &InterMVect);
+ /* Note last inter MV for future use */
+ LastInterMVect.x = InterMVect.x;
+ LastInterMVect.y = InterMVect.y;
- } else if ( BestError == MBGF_MVError ) {
+ AddMotionVector(mvList, SB, MB, 0,&InterMVect);
- cpi->MBCodingMode = CODE_GOLDEN_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&GFMVect);
+ } else if ( BestError == MBGF_MVError ) {
- /* Note last inter GF MV for future use */
- LastGFMVect.x = GFMVect.x;
- LastGFMVect.y = GFMVect.y;
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&GFMVect, CODE_GOLDEN_MV);
- AddMotionVector( cpi, &GFMVect);
- } else if ( BestError == MBInterFOURMVError ) {
- cpi->MBCodingMode = CODE_INTER_FOURMV;
+ /* Note last inter GF MV for future use */
+ LastGFMVect.x = GFMVect.x;
+ LastGFMVect.y = GFMVect.y;
- /* Calculate the UV vectors as the average of the Y plane ones. */
- /* First .x component */
- FourMVect[4].x = FourMVect[0].x + FourMVect[1].x +
- FourMVect[2].x + FourMVect[3].x;
- if ( FourMVect[4].x >= 0 )
- FourMVect[4].x = (FourMVect[4].x + 2) / 4;
- else
- FourMVect[4].x = (FourMVect[4].x - 2) / 4;
- FourMVect[5].x = FourMVect[4].x;
+ AddMotionVector(mvList, SB, MB, 0, &GFMVect);
- /* Then .y component */
- FourMVect[4].y = FourMVect[0].y + FourMVect[1].y +
- FourMVect[2].y + FourMVect[3].y;
- if ( FourMVect[4].y >= 0 )
- FourMVect[4].y = (FourMVect[4].y + 2) / 4;
- else
- FourMVect[4].y = (FourMVect[4].y - 2) / 4;
- FourMVect[5].y = FourMVect[4].y;
+ } else if ( BestError == MBInterFOURMVError ) {
- SetFragMotionVectorAndMode(cpi, YFragIndex, &FourMVect[0]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + 1, &FourMVect[1]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
- &FourMVect[2]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
- &FourMVect[3]);
- SetFragMotionVectorAndMode(cpi, UFragIndex, &FourMVect[4]);
- SetFragMotionVectorAndMode(cpi, VFragIndex, &FourMVect[5]);
+ /* Calculate the UV vectors as the average of the Y plane ones. */
+ /* First .x component */
+ FourMVect[4].x = FourMVect[0].x + FourMVect[1].x +
+ FourMVect[2].x + FourMVect[3].x;
+ if ( FourMVect[4].x >= 0 )
+ FourMVect[4].x = (FourMVect[4].x + 2) / 4;
+ else
+ FourMVect[4].x = (FourMVect[4].x - 2) / 4;
+ FourMVect[5].x = FourMVect[4].x;
- /* Note the four MVs values for current macro-block. */
- AddMotionVector( cpi, &FourMVect[0]);
- AddMotionVector( cpi, &FourMVect[1]);
- AddMotionVector( cpi, &FourMVect[2]);
- AddMotionVector( cpi, &FourMVect[3]);
+ /* Then .y component */
+ FourMVect[4].y = FourMVect[0].y + FourMVect[1].y +
+ FourMVect[2].y + FourMVect[3].y;
+ if ( FourMVect[4].y >= 0 )
+ FourMVect[4].y = (FourMVect[4].y + 2) / 4;
+ else
+ FourMVect[4].y = (FourMVect[4].y - 2) / 4;
+ FourMVect[5].y = FourMVect[4].y;
- /* Update Prior last mv with last mv */
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
+ SetFragMotionVectorAndMode(cpi, YFragIndex, &FourMVect[0], CODE_INTER_FOURMV);
+ SetFragMotionVectorAndMode(cpi, YFragIndex + 1, &FourMVect[1], CODE_INTER_FOURMV);
+ SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
+ &FourMVect[2], CODE_INTER_FOURMV);
+ SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
+ &FourMVect[3], CODE_INTER_FOURMV);
+ SetFragMotionVectorAndMode(cpi, UFragIndex, &FourMVect[4], CODE_INTER_FOURMV);
+ SetFragMotionVectorAndMode(cpi, VFragIndex, &FourMVect[5], CODE_INTER_FOURMV);
- /* Note last inter MV for future use */
- LastInterMVect.x = FourMVect[3].x;
- LastInterMVect.y = FourMVect[3].y;
+ /* Note the four MVs values for current macro-block. */
+ AddMotionVector(mvList, SB, MB, 0, &FourMVect[0]);
+ AddMotionVector(mvList, SB, MB, 1, &FourMVect[1]);
+ AddMotionVector(mvList, SB, MB, 2, &FourMVect[2]);
+ AddMotionVector(mvList, SB, MB, 3, &FourMVect[3]);
- } else {
- cpi->MBCodingMode = CODE_INTRA;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- }
+ /* Update Prior last mv with last mv */
+ PriorLastInterMVect.x = LastInterMVect.x;
+ PriorLastInterMVect.y = LastInterMVect.y;
+ /* Note last inter MV for future use */
+ LastInterMVect.x = FourMVect[3].x;
+ LastInterMVect.y = FourMVect[3].y;
- /* setting up mode specific block types
- *******************************************************/
+ } else {
- *InterError += (BestError>>8);
- *IntraError += (MBIntraError>>8);
+ SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
+ VFragIndex,&ZeroVect, CODE_INTRA);
+ }
+ /* setting up mode specific block types
+ *******************************************************/
+
+ *InterError += (BestError>>8);
+ *IntraError += (MBIntraError>>8);
}
- SB++;
+ }
+ }
+}
+
+ogg_uint32_t PickModes(CP_INSTANCE *cpi,
+ ogg_uint32_t SBRows, ogg_uint32_t SBCols,
+ ogg_uint32_t PixelsPerLine,
+ ogg_uint32_t *InterError, ogg_uint32_t *IntraError) {
+ const int num_threads = cpi->numThreads; /* Number of Threads */
+
+ ogg_uint32_t *InterErrors = malloc(num_threads * sizeof(*InterErrors));
+ ogg_uint32_t *IntraErrors = malloc(num_threads * sizeof(*IntraErrors));
+ THREAD_MV_LIST* mvList = malloc(cpi->pb.UnitFragments * sizeof(*mvList));
+ int* thread_SBRows = malloc(num_threads * sizeof(*thread_SBRows));
+ pthread_t *threads = (pthread_t *)malloc(num_threads*sizeof(*threads));
+ THREAD_PARAM *p = (THREAD_PARAM *)malloc(num_threads*sizeof(*p));
+
+ ogg_uint32_t SBrow; /* Super-Block row number */
+ int thread_id;
+
+ unsigned char QIndex;
+
+ /* initialize error scores */
+ *InterError = 0;
+ *IntraError = 0;
+
+ /* clear down the default motion vector. */
+ cpi->MvListCount = 0;
+
+ QIndex = (unsigned char)cpi->pb.FrameQIndex;
+
+ if(!cpi->MotionCompensation)
+ return 0;
+
+ /* change the quatization matrix to the one at best Q to compute the
+ new error score */
+ cpi->MinImprovementForNewMV = (MvThreshTable[QIndex] << 12);
+ cpi->InterTripOutThresh = (5000<<12);
+ cpi->MVChangeFactor = MVChangeFactorTable[QIndex]; /* 0.9 */
+
+ if ( cpi->pb.info.quick_p ) {
+ cpi->ExhaustiveSearchThresh = (1000<<12);
+ cpi->FourMVThreshold = (2500<<12);
+ } else {
+ cpi->ExhaustiveSearchThresh = (250<<12);
+ cpi->FourMVThreshold = (500<<12);
+ }
+ cpi->MinImprovementForFourMV = cpi->MinImprovementForNewMV * 4;
+
+ if(cpi->MinImprovementForFourMV < (40<<12))
+ cpi->MinImprovementForFourMV = (40<<12);
+
+ cpi->FourMvChangeFactor = 8; /* cpi->MVChangeFactor - 0.05; */
+
+
+ memset(mvList, 0, cpi->pb.UnitFragments * sizeof(*mvList));
+ memset(InterErrors, 0, num_threads * sizeof(*InterErrors));
+ memset(IntraErrors, 0, num_threads * sizeof(*IntraErrors));
+
+ { /* Balance the number of SB rows per thread */
+ const int rows_per_thread = SBRows/num_threads;
+ const int fractional_part = SBRows - (rows_per_thread) * num_threads;
+ for (thread_id=0; thread_id < num_threads; thread_id++) {
+ if (thread_id < fractional_part)
+ thread_SBRows[thread_id] = rows_per_thread + 1;
+ else
+ thread_SBRows[thread_id] = rows_per_thread;
}
}
+ SBrow = 0;
+ /* decide what block type and motion vectors to use on all of the frames */
+ for ( thread_id=0; thread_id<num_threads-1; thread_id++ ) {
+ p[thread_id].cpi = cpi;
+ p[thread_id].SBStartRow = SBrow;
+ SBrow += thread_SBRows[thread_id];
+ p[thread_id].SBEndRow = SBrow;
+ p[thread_id].SBCols = SBCols;
+ p[thread_id].PixelsPerLine = PixelsPerLine;
+ p[thread_id].mvList = mvList;
+ p[thread_id].InterError = &InterErrors[thread_id];
+ p[thread_id].IntraError = &IntraErrors[thread_id];
+ pthread_create(&threads[thread_id], NULL, ThreadPickModes, (void *)(&p[thread_id]));
+ }
+
+ p[thread_id].cpi = cpi;
+ p[thread_id].SBStartRow = SBrow;
+ SBrow += thread_SBRows[thread_id];
+ p[thread_id].SBEndRow = SBrow;
+ p[thread_id].SBCols = SBCols;
+ p[thread_id].PixelsPerLine = PixelsPerLine;
+ p[thread_id].mvList = mvList;
+ p[thread_id].InterError = &InterErrors[thread_id];
+ p[thread_id].IntraError = &IntraErrors[thread_id];
+
+ ThreadPickModes((void *)(&p[thread_id]));
+
+ /* Synchronize threads */
+ for (thread_id=0; thread_id<num_threads-1; thread_id++) {
+ pthread_join(threads[thread_id],NULL);
+ }
+
+ for ( thread_id=0; thread_id<num_threads; thread_id++ ) {
+ (*InterError) += InterErrors[thread_id];
+ (*IntraError) += IntraErrors[thread_id];
+ }
+
+ { /* Put the Motion Vectors on the cpi->MVList with the correct fragment order */
+ int fragIndex;
+ for (fragIndex=0; fragIndex < cpi->pb.UnitFragments; fragIndex++) {
+ if( mvList[fragIndex].valid ) {
+ cpi->MVList[cpi->MvListCount].x = mvList[fragIndex].mv.x;
+ cpi->MVList[cpi->MvListCount].y = mvList[fragIndex].mv.y;
+ cpi->MvListCount++;
+ }
+ }
+ }
+
+ free(thread_SBRows);
+ free(p);
+ free(threads);
+ free(InterErrors);
+ free(IntraErrors);
+ free(mvList);
+
/* Return number of pixels coded */
return 0;
}
Modified: branches/theora-multithread/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-multithread/lib/enc/encoder_toplevel.c 2007-10-06 20:43:25 UTC (rev 13940)
+++ branches/theora-multithread/lib/enc/encoder_toplevel.c 2007-10-06 22:15:55 UTC (rev 13941)
@@ -938,6 +938,7 @@
cpi->MotionCompensation = 1;
cpi->ThreshMapThreshold = 5;
cpi->MaxConsDroppedFrames = 1;
+ cpi->numThreads = 1;
/* Set encoder flags. */
/* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */
@@ -1454,6 +1455,10 @@
value = 2;
memcpy(buf, &value, sizeof(int));
return 0;
+ case TH_ENCCTL_SET_NUM_THREADS:
+ memcpy(&value, buf, sizeof(int));
+ cpi->numThreads = value;
+ return 0;
default:
return TH_EIMPL;
}
More information about the commits
mailing list