[xiph-commits] r12840 - in experimental/j/theora-mashup: . lib
lib/enc lib/enc/x86_32 lib/enc/x86_64
j at svn.xiph.org
j at svn.xiph.org
Mon Apr 9 11:09:02 PDT 2007
Author: j
Date: 2007-04-09 11:08:48 -0700 (Mon, 09 Apr 2007)
New Revision: 12840
Added:
experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c
experimental/j/theora-mashup/lib/enc/x86_32/idct_mmx.c
experimental/j/theora-mashup/lib/enc/x86_64/dct_decode_mmx.c
experimental/j/theora-mashup/lib/enc/x86_64/idct_mmx.c
Modified:
experimental/j/theora-mashup/configure.ac
experimental/j/theora-mashup/lib/Makefile.am
experimental/j/theora-mashup/lib/enc/codec_internal.h
experimental/j/theora-mashup/lib/enc/dct.c
experimental/j/theora-mashup/lib/enc/dct_decode.c
experimental/j/theora-mashup/lib/enc/dsp.c
experimental/j/theora-mashup/lib/enc/dsp.h
experimental/j/theora-mashup/lib/enc/encoder_idct.c
experimental/j/theora-mashup/lib/enc/encoder_quant.c
experimental/j/theora-mashup/lib/enc/mcomp.c
experimental/j/theora-mashup/lib/enc/reconstruct.c
Log:
merge changes from trunk
Modified: experimental/j/theora-mashup/configure.ac
===================================================================
--- experimental/j/theora-mashup/configure.ac 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/configure.ac 2007-04-09 18:08:48 UTC (rev 12840)
@@ -24,7 +24,7 @@
AC_SUBST(V_LIB_AGE)
dnl Extra linker options (for version script)
-SHLIB_VERSION_ARG=""
+THEORA_LDFLAGS=""
dnl --------------------------------------------------
dnl Check for programs
@@ -115,6 +115,10 @@
cpu_x86_32=yes
cpu_optimization="32 bit x86"
AC_DEFINE([USE_ASM], [], [make use of asm optimization])
+ if test "x$target_vendor" = "xapple"; then
+ THEORA_LDFLAGS="$THEORA_LDFLAGS -Wl,-read_only_relocs,suppress"
+ fi
+
AC_DEFINE([OC_X86ASM], [], [enable x86 assambler optimization])
AM_CONDITIONAL(OC_X86ASM,true)
;;
@@ -146,9 +150,10 @@
*)
;;
esac
+ THEORA_LDFLAGS="$THEORA_LDFLAGS SHLIB_VERSION_ARG"
fi
-AC_SUBST(SHLIB_VERSION_ARG)
+AC_SUBST(THEORA_LDFLAGS)
dnl --------------------------------------------------
dnl Checks for support libraries and headers
Modified: experimental/j/theora-mashup/lib/Makefile.am
===================================================================
--- experimental/j/theora-mashup/lib/Makefile.am 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/Makefile.am 2007-04-09 18:08:48 UTC (rev 12840)
@@ -1,14 +1,17 @@
INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/lib -I$(top_srcdir)/lib/dec -I$(top_srcdir)/lib/enc
EXTRA_DIST = Version_script.in \
+ enc/x86_32/dct_decode_mmx.c \
enc/x86_32/dsp_mmx.c \
enc/x86_32/dsp_mmxext.c \
enc/x86_32/recon_mmx.c \
enc/x86_32/fdct_mmx.c \
+ enc/x86_32/idct_mmx.c \
enc/x86_64/dsp_mmx.c \
enc/x86_64/dsp_mmxext.c \
enc/x86_64/recon_mmx.c \
enc/x86_64/fdct_mmx.c \
+ enc/x86_64/idct_mmx.c \
enc/x86_32_vs/dsp_mmx.c \
enc/x86_32_vs/fdct_mmx.c \
enc/x86_32_vs/recon_mmx.c \
@@ -46,17 +49,21 @@
if CPU_x86_64
enc_arch_dir = enc/x86_64
encoder_arch_sources= \
+ $(arch_dir)/dct_decode_mmx.c \
$(arch_dir)/dsp_mmx.c \
$(arch_dir)/dsp_mmxext.c \
$(arch_dir)/recon_mmx.c \
+ $(arch_dir)/idct_mmx.c \
$(arch_dir)/fdct_mmx.c
else
if CPU_x86_32
arch_dir = enc/x86_32
encoder_arch_sources= \
+ $(arch_dir)/dct_decode_mmx.c \
$(arch_dir)/dsp_mmx.c \
$(arch_dir)/dsp_mmxext.c \
$(arch_dir)/recon_mmx.c \
+ $(arch_dir)/idct_mmx.c \
$(arch_dir)/fdct_mmx.c
endif
endif
@@ -124,7 +131,7 @@
libtheora_la_CFLAGS = $(OGG_CFLAGS)
-libtheora_la_LDFLAGS = -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@ @SHLIB_VERSION_ARG@
+libtheora_la_LDFLAGS = -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@ @THEORA_LDFLAGS@
libtheora_la_LIBADD = $(OGG_LIBS)
debug:
Modified: experimental/j/theora-mashup/lib/enc/codec_internal.h
===================================================================
--- experimental/j/theora-mashup/lib/enc/codec_internal.h 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/codec_internal.h 2007-04-09 18:08:48 UTC (rev 12840)
@@ -453,7 +453,7 @@
/* Loop filter bounding values */
unsigned char LoopFilterLimits[Q_TABLE_SIZE];
- ogg_int32_t FiltBoundingValue[512];
+ ogg_int16_t FiltBoundingValue[256];
/* Dequantiser and rounding tables */
ogg_uint32_t QThreshTable[Q_TABLE_SIZE];
@@ -706,15 +706,6 @@
extern void InitPBInstance(PB_INSTANCE *pbi);
extern void ClearPBInstance(PB_INSTANCE *pbi);
-
-extern void IDctSlow( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) ;
-
-extern void IDct10( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData );
-
extern void IDct1( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData );
Modified: experimental/j/theora-mashup/lib/enc/dct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dct.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dct.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -257,6 +257,8 @@
void dsp_dct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
{
funcs->fdct_short = fdct_short__c;
+ dsp_dct_decode_init(funcs, cpu_flags);
+ dsp_idct_init(funcs, cpu_flags);
#if defined(USE_ASM)
if (cpu_flags & OC_CPU_X86_MMX) {
dsp_mmx_fdct_init(funcs);
Modified: experimental/j/theora-mashup/lib/enc/dct_decode.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dct_decode.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dct_decode.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -18,7 +18,6 @@
#include <stdlib.h>
#include <string.h>
#include "codec_internal.h"
-#include "dsp.h"
#define GOLDEN_FRAME_THRESH_Q 50
@@ -28,7 +27,7 @@
#define PL 1
#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
-/* in-loop filter tables. one of these is used in dct_decode.c */
+/* in-loop filter tables. */
static const unsigned char LoopFilterLimitValuesV1[Q_TABLE_SIZE] = {
30, 25, 20, 20, 15, 15, 14, 14,
@@ -41,27 +40,16 @@
0, 0, 0, 0, 0, 0, 0, 0
};
-static const unsigned char LoopFilterLimitValuesV2[Q_TABLE_SIZE] = {
- 30, 25, 20, 20, 15, 15, 14, 14,
- 13, 13, 12, 12, 11, 11, 10, 10,
- 9, 9, 8, 8, 7, 7, 7, 7,
- 6, 6, 6, 6, 5, 5, 5, 5,
- 4, 4, 4, 4, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1
-};
-
static const int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
static void SetupBoundingValueArray_Generic(PB_INSTANCE *pbi,
ogg_int32_t FLimit){
- ogg_int32_t * BoundingValuePtr = pbi->FiltBoundingValue+256;
+ ogg_int16_t * BoundingValuePtr = pbi->FiltBoundingValue+127;
ogg_int32_t i;
/* Set up the bounding value array. */
- memset ( pbi->FiltBoundingValue, 0, (512*sizeof(*pbi->FiltBoundingValue)) );
+ memset ( pbi->FiltBoundingValue, 0, (256*sizeof(*pbi->FiltBoundingValue)) );
for ( i = 0; i < FLimit; i++ ){
BoundingValuePtr[-i-FLimit] = (-FLimit+i);
BoundingValuePtr[-i] = -i;
@@ -107,8 +95,6 @@
void SetupLoopFilter(PB_INSTANCE *pbi){
ogg_int32_t FLimit;
- /* nb: this was using the V2 values rather than V1
- we think is was a mistake; the results were not used */
FLimit = pbi->LoopFilterLimits[pbi->FrameQIndex];
SetupBoundingValueArray_Generic(pbi, FLimit);
}
@@ -137,11 +123,14 @@
case 0:case 1:
IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
- case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ case 2: case 3:
+ dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
+ case 4:case 5:case 6:case 7:case 8: case 9:case 10:
+ dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ break;
default:
- IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
}
/* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -217,11 +206,14 @@
case 0:case 1:
IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
- case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ case 2: case 3:
+ dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
break;
+ case 4:case 5:case 6:case 7:case 8: case 9:case 10:
+ dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ break;
default:
- IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+ dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
}
/* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -283,7 +275,7 @@
/* Reconstruct the pixel dats from the reference frame and change data
(no half pixel in this case as the two references were the same. */
dsp_recon_inter8x8 (pbi->dsp,
- &pbi->ThisFrameRecon[ReconPixelIndex],
+ &pbi->ThisFrameRecon[ReconPixelIndex],
LastFrameRecPtr, pbi->ReconDataBuffer,
ReconPixelsPerLine);
}else{
@@ -673,9 +665,9 @@
}
}
-static void FilterHoriz(unsigned char * PixelPtr,
+static void FilterHoriz__c(unsigned char * PixelPtr,
ogg_int32_t LineLength,
- ogg_int32_t *BoundingValuePtr){
+ ogg_int16_t *BoundingValuePtr){
ogg_int32_t j;
ogg_int32_t FiltVal;
@@ -695,18 +687,16 @@
}
}
-static void FilterVert(unsigned char * PixelPtr,
+static void FilterVert__c(unsigned char * PixelPtr,
ogg_int32_t LineLength,
- ogg_int32_t *BoundingValuePtr){
+ ogg_int16_t *BoundingValuePtr){
ogg_int32_t j;
ogg_int32_t FiltVal;
-
+ PixelPtr -= 2*LineLength;
/* the math was correct, but negative array indicies are forbidden
by ANSI/C99 and will break optimization on several modern
compilers */
- PixelPtr -= 2*LineLength;
-
for ( j = 0; j < 8; j++ ) {
FiltVal = ( (ogg_int32_t)PixelPtr[0] ) -
( (ogg_int32_t)PixelPtr[LineLength] * 3 ) +
@@ -725,7 +715,7 @@
void LoopFilter(PB_INSTANCE *pbi){
ogg_int32_t i;
- ogg_int32_t * BoundingValuePtr=pbi->FiltBoundingValue+256;
+ ogg_int16_t * BoundingValuePtr=pbi->FiltBoundingValue+127;
int FragsAcross=pbi->HFragments;
int FromFragment,ToFragment;
int FragsDown = pbi->VFragments;
@@ -790,14 +780,14 @@
/* Filter right hand border only if the block to the right is
not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i]+6,
LineLength,BoundingValuePtr);
}
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i+LineFragments],
LineLength, BoundingValuePtr);
}
@@ -809,21 +799,21 @@
for ( n = 1 ; n < FragsAcross - 1 ; n++, i++) {
if( pbi->display_fragments[i]){
/* Filter Left edge always */
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i]-2,
LineLength, BoundingValuePtr);
/* Filter right hand border only if the block to the right is
not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i]+6,
LineLength, BoundingValuePtr);
}
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i + LineFragments],
LineLength, BoundingValuePtr);
}
@@ -835,13 +825,13 @@
/* Last Column */
if( pbi->display_fragments[i]){
/* Filter Left edge always */
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] - 2 ,
LineLength, BoundingValuePtr);
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i + LineFragments],
LineLength, BoundingValuePtr);
}
@@ -859,21 +849,21 @@
all fragments are intra */
if( pbi->display_fragments[i]){
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
/* Filter right hand border only if the block to the right is
not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] + 6,
LineLength, BoundingValuePtr);
}
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i + LineFragments],
LineLength, BoundingValuePtr);
}
@@ -885,26 +875,26 @@
for ( n = 1 ; n < FragsAcross - 1 ; n++, i++){
if( pbi->display_fragments[i]){
/* Filter Left edge always */
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] - 2,
LineLength, BoundingValuePtr);
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
/* Filter right hand border only if the block to the right
is not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] + 6,
LineLength, BoundingValuePtr);
}
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i + LineFragments],
LineLength, BoundingValuePtr);
}
@@ -915,18 +905,18 @@
/* Last Column */
if( pbi->display_fragments[i]){
/* Filter Left edge always*/
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] - 2,
LineLength, BoundingValuePtr);
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
/* Bottom done if next row set */
if( !pbi->display_fragments[ i + LineFragments] ){
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i + LineFragments],
LineLength, BoundingValuePtr);
}
@@ -944,14 +934,14 @@
if( pbi->display_fragments[i]){
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
/* Filter right hand border only if the block to the right is
not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] + 6,
LineLength, BoundingValuePtr);
}
@@ -963,19 +953,19 @@
for ( n = 1 ; n < FragsAcross - 1 ; n++, i++){
if( pbi->display_fragments[i]){
/* Filter Left edge always */
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] - 2,
LineLength, BoundingValuePtr);
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
/* Filter right hand border only if the block to the right is
not coded */
if ( !pbi->display_fragments[ i + 1 ] ){
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] + 6,
LineLength, BoundingValuePtr);
}
@@ -986,12 +976,12 @@
/* Last Column */
if( pbi->display_fragments[i]){
/* Filter Left edge always */
- FilterHoriz(pbi->LastFrameRecon+
+ dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i] - 2,
LineLength, BoundingValuePtr);
/* TopRow is always done */
- FilterVert(pbi->LastFrameRecon+
+ dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
pbi->recon_pixel_index_table[i],
LineLength, BoundingValuePtr);
@@ -1222,3 +1212,14 @@
UpdateUMVBorder(pbi, pbi->GoldenFrame);
}
}
+
+void dsp_dct_decode_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+ funcs->FilterVert = FilterVert__c;
+ funcs->FilterHoriz = FilterHoriz__c;
+#if defined(USE_ASM)
+ if (cpu_flags & OC_CPU_X86_MMX) {
+ dsp_mmx_dct_decode_init(funcs);
+ }
+#endif
+}
Modified: experimental/j/theora-mashup/lib/enc/dsp.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dsp.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dsp.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -16,8 +16,6 @@
********************************************************************/
#include <stdlib.h>
-#include "cpu.h"
-#include "dsp.h"
#include "codec_internal.h"
#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
@@ -404,7 +402,7 @@
{
ogg_uint32_t cpuflags;
- cpuflags = oc_cpu_flags_get();
+ cpuflags = oc_cpu_flags_get ();
dsp_init (funcs);
dsp_recon_init (funcs, cpuflags);
Modified: experimental/j/theora-mashup/lib/enc/dsp.h
===================================================================
--- experimental/j/theora-mashup/lib/enc/dsp.h 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dsp.h 2007-04-09 18:08:48 UTC (rev 12840)
@@ -18,7 +18,8 @@
#ifndef DSP_H
#define DSP_H
-#include <theora/theora.h>
+#include "theora/theora.h"
+#include "cpu.h"
typedef struct
{
@@ -77,10 +78,27 @@
ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
unsigned char *RefDataPtr1,
unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
+
+ void (*FilterHoriz) (unsigned char * PixelPtr,
+ ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
+
+ void (*FilterVert) (unsigned char * PixelPtr,
+ ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
+
+ void (*IDctSlow) (ogg_int16_t *InputData,
+ ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+
+ void (*IDct3) (ogg_int16_t *InputData,
+ ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+
+ void (*IDct10) (ogg_int16_t *InputData,
+ ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
} DspFunctions;
extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_dct_decode_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_idct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
void dsp_init(DspFunctions *funcs);
void dsp_static_init(DspFunctions *funcs);
@@ -89,6 +107,8 @@
extern void dsp_mmxext_init(DspFunctions *funcs);
extern void dsp_mmx_fdct_init(DspFunctions *funcs);
extern void dsp_mmx_recon_init(DspFunctions *funcs);
+extern void dsp_mmx_dct_decode_init(DspFunctions *funcs);
+extern void dsp_mmx_idct_init(DspFunctions *funcs);
#endif
#define dsp_save_fpu(funcs) (funcs.save_fpu ())
@@ -132,5 +152,19 @@
#define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
(funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
+#define dsp_FilterHoriz(funcs, ptr1, ptr2, ptr3) \
+ (funcs.FilterHoriz(ptr1, ptr2, ptr3))
+#define dsp_FilterVert(funcs, ptr1, ptr2, ptr3) \
+ (funcs.FilterVert(ptr1, ptr2, ptr3))
+
+#define dsp_IDctSlow(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
+#define dsp_IDct3(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
+#define dsp_IDct10(funcs, ptr1, ptr2, ptr3) \
+ (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
#endif /* DSP_H */
Modified: experimental/j/theora-mashup/lib/enc/encoder_idct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/encoder_idct.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/encoder_idct.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -17,10 +17,10 @@
#include <string.h>
#include "codec_internal.h"
+
#include "quant_lookup.h"
#define IdctAdjustBeforeShift 8
-
/* cos(n*pi/16) or sin(8-n)*pi/16) */
#define xC1S7 64277
#define xC2S6 60547
@@ -31,6 +31,7 @@
#define xC7S1 12785
/* compute the 16 bit signed 1D inverse DCT - spec version */
+/*
static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
ogg_int32_t t[8], r;
ogg_int16_t *y = InputData;
@@ -108,6 +109,7 @@
x[7] = r;
}
+*/
static void dequant_slow( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
@@ -119,7 +121,7 @@
-void IDctSlow( Q_LIST_ENTRY * InputData,
+void IDctSlow__c( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ) {
ogg_int32_t IntermediateData[64];
@@ -348,7 +350,7 @@
}
-void IDct10( Q_LIST_ENTRY * InputData,
+void IDct10__c( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
ogg_int32_t IntermediateData[64];
@@ -553,3 +555,15 @@
OutputData[loop]=OutD;
}
+
+void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+ funcs->IDctSlow = IDctSlow__c;
+ funcs->IDct10 = IDct10__c;
+ funcs->IDct3 = IDct10__c;
+#if defined(USE_ASM)
+ if (cpu_flags & OC_CPU_X86_MMX) {
+ dsp_mmx_idct_init(funcs);
+ }
+#endif
+}
Modified: experimental/j/theora-mashup/lib/enc/encoder_quant.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/encoder_quant.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/encoder_quant.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -551,7 +551,7 @@
void select_UV_quantiser ( PB_INSTANCE *pbi ){
pbi->fquant_coeffs = pbi->fp_quant_UV_coeffs;
pbi->fquant_round = pbi->fp_quant_UV_round;
- pbi->fquant_ZbSize = pbi->fp_quant_UV_round;
+ pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_UV;
}
void select_InterUV_quantiser ( PB_INSTANCE *pbi ){
Modified: experimental/j/theora-mashup/lib/enc/mcomp.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/mcomp.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/mcomp.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -17,7 +17,6 @@
#include <stdlib.h>
#include <stdio.h>
-#include "dsp.h"
#include "codec_internal.h"
/* Initialises motion compentsation. */
Modified: experimental/j/theora-mashup/lib/enc/reconstruct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/reconstruct.c 2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/reconstruct.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -16,8 +16,6 @@
********************************************************************/
#include "codec_internal.h"
-#include "dsp.h"
-#include "cpu.h"
static void copy8x8__c (unsigned char *src,
unsigned char *dest,
Copied: experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c (from rev 12835, trunk/theora/lib/x86_32/dct_decode_mmx.c)
===================================================================
--- trunk/theora/lib/x86_32/dct_decode_mmx.c 2007-04-08 13:04:56 UTC (rev 12835)
+++ experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c 2007-04-09 18:08:48 UTC (rev 12840)
@@ -0,0 +1,184 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: dct_decode_mmx.c 12440 2007-02-06 16:36:26Z j $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+
+#include "codec_internal.h"
+
+#if defined(USE_ASM)
+
+static const __attribute__((aligned(8),used)) ogg_int64_t V3= 0x0003000300030003LL;
+static const __attribute__((aligned(8),used)) ogg_int64_t V804= 0x0804080408040804LL;
+
+#if defined(__APPLE__)
+#define MANGLE(x) "_"#x
+#else
+#define MANGLE(x) #x
+#endif
+
+static void FilterHoriz__mmx(unsigned char * PixelPtr,
+ ogg_int32_t LineLength,
+ ogg_int16_t *BoundingValuePtr){
+
+#define OC_LOOP_H_4x4 \
+ __asm__ __volatile__( \
+ "lea (%1,%1,2),%%esi\n" /* esi = ystride*3 */ \
+ "movd (%0), %%mm0\n" /* 0 0 0 0 3 2 1 0 */ \
+ "movd (%0,%1),%%mm1\n" /* 0 0 0 0 7 6 5 4 */ \
+ "movd (%0,%1,2),%%mm2\n" /* 0 0 0 0 b a 9 8 */ \
+ "movd (%0,%%esi),%%mm3\n" /* 0 0 0 0 f e d c */ \
+ "punpcklbw %%mm1,%%mm0\n" /* mm0 = 7 3 6 2 5 1 4 0 */ \
+ "punpcklbw %%mm3,%%mm2\n" /* mm2 = f b e a d 9 c 8 */ \
+ "movq %%mm0,%%mm1\n" /* mm1 = 7 3 6 2 5 1 4 0 */ \
+ "punpcklwd %%mm2,%%mm1\n" /* mm1 = d 9 5 1 c 8 4 0 */ \
+ "punpckhwd %%mm2,%%mm0\n" /* mm0 = f b 7 3 e a 6 2 */ \
+ "pxor %%mm7,%%mm7\n" \
+ "movq %%mm1,%%mm5\n" /* mm5 = d 9 5 1 c 8 4 0 */ \
+ "punpckhbw %%mm7,%%mm5\n" /* mm5 = 0 d 0 9 0 5 0 1 = pix[1]*/ \
+ "punpcklbw %%mm7,%%mm1\n" /* mm1 = 0 c 0 8 0 4 0 0 = pix[0]*/ \
+ "movq %%mm0,%%mm3\n" /* mm3 = f b 7 3 e a 6 2 */ \
+ "punpckhbw %%mm7,%%mm3\n" /* mm3 = 0 f 0 b 0 7 0 3 = pix[3]*/ \
+ "punpcklbw %%mm7,%%mm0\n" /* mm0 = 0 e 0 a 0 6 0 2 = pix[2]*/ \
+ \
+ "psubw %%mm3,%%mm1\n" /* mm1 = pix[0]-pix[3] mm1 - mm3 */ \
+ "movq %%mm0,%%mm7\n" /* mm7 = pix[2]*/ \
+ "psubw %%mm5,%%mm0\n" /* mm0 = pix[2]-pix[1] mm0 - mm5*/ \
+ "PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */ \
+ "paddw %%mm0,%%mm1\n" /* mm1 has f[0] ... f[4]*/ \
+ "paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \
+ "psraw $3,%%mm1\n" /* >>3 */ \
+ " pextrw $0,%%mm1,%%esi\n" /* In MM1 we have 4 f coefs (16bits) */ \
+ " pextrw $1,%%mm1,%%edi\n" /* now perform MM4 = *(_bv+ f) */ \
+ " pinsrw $0,(%2,%%esi,2),%%mm4\n" \
+ " pextrw $2,%%mm1,%%esi\n" \
+ " pinsrw $1,(%2,%%edi,2),%%mm4\n" \
+ " pextrw $3,%%mm1,%%edi\n" \
+ " pinsrw $2,(%2,%%esi,2),%%mm4\n" \
+ " pinsrw $3,(%2,%%edi,2),%%mm4\n" /* new f vals loaded */ \
+ "pxor %%mm0,%%mm0\n" \
+ " paddw %%mm4,%%mm5\n" /*(pix[1]+f);*/ \
+ " psubw %%mm4,%%mm7\n" /* (pix[2]-f); */ \
+ " packuswb %%mm0,%%mm5\n" /* mm5 = x x x x newpix1 */ \
+ " packuswb %%mm0,%%mm7\n" /* mm7 = x x x x newpix2 */ \
+ " punpcklbw %%mm7,%%mm5\n" /* 2 1 2 1 2 1 2 1 */ \
+ " movd %%mm5,%%eax\n" /* eax = newpix21 */ \
+ " movw %%ax,1(%0)\n" \
+ " psrlq $32,%%mm5\n" /* why is so big stall here ? */ \
+ " shrl $16,%%eax\n" \
+ " lea 1(%0,%1,2),%%edi\n" \
+ " movw %%ax,1(%0,%1,1)\n" \
+ " movd %%mm5,%%eax\n" /* eax = newpix21 high part */ \
+ " lea (%1,%1,2),%%esi\n" \
+ " movw %%ax,(%%edi)\n" \
+ " shrl $16,%%eax\n" \
+ " movw %%ax,1(%0,%%esi)\n" \
+ : \
+ : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256) \
+ : "esi", "edi" , "memory", "eax" \
+ );
+
+ OC_LOOP_H_4x4
+ PixelPtr += LineLength*4;
+ OC_LOOP_H_4x4
+ __asm__ __volatile__("emms\n");
+}
+
+static void FilterVert__mmx(unsigned char * PixelPtr,
+ ogg_int32_t LineLength,
+ ogg_int16_t *BoundingValuePtr){
+ __asm__ __volatile__(
+ "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
+ "movq (%0),%%mm7\n" /* mm7 = pix[0..7] */
+ "lea (%1,%1,2),%%esi\n" /* esi = ystride*3 */
+ "movq (%0,%%esi),%%mm4\n" /* mm4 = pix[0..7+ystride*3] */
+ "movq %%mm7,%%mm6\n" /* mm6 = pix[0..7] */
+ "punpcklbw %%mm0,%%mm6\n" /* expand unsigned pix[0..3] to 16 bits */
+ "movq %%mm4,%%mm5\n"
+ "punpckhbw %%mm0,%%mm7\n" /* expand unsigned pix[4..7] to 16 bits */
+ "punpcklbw %%mm0,%%mm4\n" /* expand other arrays too */
+ "punpckhbw %%mm0,%%mm5\n"
+ "psubw %%mm4,%%mm6\n" /* mm6 = mm6 - mm4 */
+ "psubw %%mm5,%%mm7\n" /* mm7 = mm7 - mm5 */
+ /* mm7:mm6 = _p[0]-_p[ystride*3] */
+ "movq (%0,%1),%%mm4\n" /* mm4 = pix[0..7+ystride] */
+ "movq %%mm4,%%mm5\n"
+ "movq (%0,%1,2),%%mm2\n" /* mm2 = pix[0..7+ystride*2] */
+ "movq %%mm2,%%mm3\n"
+ "movq %%mm2,%%mm1\n" //ystride*2
+ "punpckhbw %%mm0,%%mm5\n"
+ "punpcklbw %%mm0,%%mm4\n"
+ "punpckhbw %%mm0,%%mm3\n"
+ "punpcklbw %%mm0,%%mm2\n"
+ "psubw %%mm5,%%mm3\n"
+ "psubw %%mm4,%%mm2\n"
+ /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */
+ "PMULLW "MANGLE(V3)",%%mm3\n" /* *3 */
+ "PMULLW "MANGLE(V3)",%%mm2\n" /* *3 */
+ "paddw %%mm7,%%mm3\n" /* highpart */
+ "paddw %%mm6,%%mm2\n" /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]); */
+ "paddw "MANGLE(V804)",%%mm3\n" /* add 4 */ /* add 256 after shift */
+ "paddw "MANGLE(V804)",%%mm2\n" /* add 4 */ /* add 256 after shift */
+ "psraw $3,%%mm3\n" /* >>3 f coefs high */
+ "psraw $3,%%mm2\n" /* >>3 f coefs low */
+
+ " pextrw $0,%%mm2,%%esi\n" /* In MM3:MM2 we have f coefs (16bits) */
+ " pextrw $1,%%mm2,%%edi\n" /* now perform MM7:MM6 = *(_bv+ f) */
+ " pinsrw $0,(%2,%%esi,2),%%mm6\n"
+ " pinsrw $1,(%2,%%edi,2),%%mm6\n"
+
+ " pextrw $2,%%mm2,%%esi\n"
+ " pextrw $3,%%mm2,%%edi\n"
+ " pinsrw $2,(%2,%%esi,2),%%mm6\n"
+ " pinsrw $3,(%2,%%edi,2),%%mm6\n"
+
+ " pextrw $0,%%mm3,%%esi\n"
+ " pextrw $1,%%mm3,%%edi\n"
+ " pinsrw $0,(%2,%%esi,2),%%mm7\n"
+ " pinsrw $1,(%2,%%edi,2),%%mm7\n"
+
+ " pextrw $2,%%mm3,%%esi\n"
+ " pextrw $3,%%mm3,%%edi\n"
+ " pinsrw $2,(%2,%%esi,2),%%mm7\n"
+ " pinsrw $3,(%2,%%edi,2),%%mm7\n" //MM7 MM6 f=*(_bv+(f+4>>3));
+
+ "paddw %%mm6,%%mm4\n" /* (pix[ystride]+f); */
+ "paddw %%mm7,%%mm5\n" /* (pix[ystride]+f); */
+ "movq %%mm1,%%mm2\n"
+ "punpcklbw %%mm0,%%mm1\n"
+ "punpckhbw %%mm0,%%mm2\n" //[ystride*2]
+ "psubw %%mm6,%%mm1\n" /* (pix[ystride*2]-f); */
+ "psubw %%mm7,%%mm2\n" /* (pix[ystride*2]-f); */
+ "packuswb %%mm2,%%mm1\n"
+ "packuswb %%mm5,%%mm4\n"
+ "movq %%mm1,(%0,%1,2)\n" /* pix[ystride*2]= */
+ "movq %%mm4,(%0,%1)\n" /* pix[ystride]= */
+ "emms\n"
+ :
+ : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256)
+ : "esi", "edi" , "memory"
+ );
+}
+
+/* install our implementation in the function table */
+void dsp_mmx_dct_decode_init(DspFunctions *funcs)
+{
+ TH_DEBUG("enabling accelerated x86_32 mmx dct decode functions.\n");
+ funcs->FilterVert = FilterVert__mmx;
+ funcs->FilterHoriz = FilterHoriz__mmx;
+}
+
+#endif /* USE_ASM */
Copied: experimental/j/theora-mashup/lib/enc/x86_32/idct_mmx.c (from rev 12835, trunk/theora/lib/x86_32/idct_mmx.c)
Copied: experimental/j/theora-mashup/lib/enc/x86_64/dct_decode_mmx.c (from rev 12835, trunk/theora/lib/x86_64/dct_decode_mmx.c)
Copied: experimental/j/theora-mashup/lib/enc/x86_64/idct_mmx.c (from rev 12835, trunk/theora/lib/x86_64/idct_mmx.c)
More information about the commits
mailing list