[xiph-commits] r17173 - in branches/theorarm-merge-branch: . lib lib/x86

robin at svn.xiph.org robin at svn.xiph.org
Sat Apr 24 07:22:55 PDT 2010


Author: robin
Date: 2010-04-24 07:22:55 -0700 (Sat, 24 Apr 2010)
New Revision: 17173

Modified:
   branches/theorarm-merge-branch/configure.ac
   branches/theorarm-merge-branch/lib/Makefile.am
   branches/theorarm-merge-branch/lib/analyze.c
   branches/theorarm-merge-branch/lib/bitpack.h
   branches/theorarm-merge-branch/lib/decode.c
   branches/theorarm-merge-branch/lib/encint.h
   branches/theorarm-merge-branch/lib/encode.c
   branches/theorarm-merge-branch/lib/huffdec.c
   branches/theorarm-merge-branch/lib/internal.h
   branches/theorarm-merge-branch/lib/mcenc.c
   branches/theorarm-merge-branch/lib/state.c
   branches/theorarm-merge-branch/lib/x86/mmxstate.c
   branches/theorarm-merge-branch/lib/x86/x86int.h
Log:
[Reapply of revision 17171 that went in without a commit message, and was
therefore subsequently backed out in 17172.]

Tweak theorarm-merge-branch so that it still builds on linux using
./autogen.sh. This has meant pushing various of the oc_mv changes through
the encoder side too - this needs to be discussed with the library
maintainers as I fear they may not like it.

I can't see any other way to do it without losing speed on ARMs, but maybe
smarter people than me can spot something.

Next step is to put the bitreading changes in protected with OC_LIBOGG2.



Modified: branches/theorarm-merge-branch/configure.ac
===================================================================
--- branches/theorarm-merge-branch/configure.ac	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/configure.ac	2010-04-24 14:22:55 UTC (rev 17173)
@@ -58,6 +58,7 @@
 AC_PROG_CPP
 CFLAGS="$cflags_save"
 
+AM_PROG_AS
 AM_PROG_CC_C_O
 AC_LIBTOOL_WIN32_DLL
 AM_PROG_LIBTOOL
@@ -191,6 +192,7 @@
 
 cpu_x86_64=no
 cpu_x86_32=no
+cpu_arm=no
 AC_ARG_ENABLE(asm,
     AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),
     [ ac_enable_asm=$enableval ], [ ac_enable_asm=yes] )
@@ -212,12 +214,17 @@
     AC_DEFINE([OC_X86_ASM], [],  [make use of x86 asm optimization])
     AC_DEFINE([OC_X86_64_ASM], [],  [make use of x86_64 asm optimization])
     ;;
+  arm)
+    cpu_arm=yes
+    cpu_optimization="arm"
+    AC_DEFINE([OC_ARM_ASM], [],  [make use of ARM asm optimization])
   esac
 else
   cpu_optimization="disabled"
 fi
 AM_CONDITIONAL([CPU_x86_64], [test x$cpu_x86_64 = xyes])
 AM_CONDITIONAL([CPU_x86_32], [test x$cpu_x86_32 = xyes])
+AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes])
 
 # Test whenever ld supports -version-script
 AC_PROG_LD

Modified: branches/theorarm-merge-branch/lib/Makefile.am
===================================================================
--- branches/theorarm-merge-branch/lib/Makefile.am	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/Makefile.am	2010-04-24 14:22:55 UTC (rev 17173)
@@ -96,11 +96,13 @@
 	x86/mmxstate.c \
 	x86/x86state.c
 
+if CPU_ARM
 decoder_arm_sources = \
 	arm/ARMfrag.s \
 	arm/ARMidct.s \
 	arm/ARMpp.s \
 	arm/ARMstate.c
+endif
 
 if CPU_x86_64
 decoder_arch_sources = $(decoder_x86_sources)

Modified: branches/theorarm-merge-branch/lib/analyze.c
===================================================================
--- branches/theorarm-merge-branch/lib/analyze.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/analyze.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -545,7 +545,7 @@
 
 /*Temporary encoder state for the analysis pipeline.*/
 struct oc_enc_pipeline_state{
-  int                 bounding_values[256];
+  signed char         bounding_values[256];
   oc_fr_state         fr[3];
   oc_qii_state        qs[3];
   /*Condensed dequantization tables.*/
@@ -805,7 +805,7 @@
       const oc_mv *frag_mvs;
       frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
       nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
-       frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
+       frag_mvs[_fragi].v[0],frag_mvs[_fragi].v[1]);
       if(nmv_offs>1){
         oc_enc_frag_copy2(_enc,dst,
          ref+mv_offs[0],ref+mv_offs[1],ystride);
@@ -2139,7 +2139,7 @@
  unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
  const unsigned _skip_ssd[12],const unsigned _rd_scale[4]){
   static const oc_mv OC_MV_ZERO;
-  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,&OC_MV_ZERO.v[0],
    _fr,_qs,_skip_ssd,_rd_scale);
 }
 
@@ -2165,8 +2165,8 @@
  unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
  const unsigned _skip_ssd[12],const unsigned _rd_scale[5]){
   unsigned               frag_satd[12];
-  oc_mv                  lbmvs[4];
-  oc_mv                  cbmvs[4];
+  oc_mv4                 lbmvs;
+  oc_mv4                 cbmvs;
   const unsigned char   *src;
   const unsigned char   *ref;
   int                    ystride;
@@ -2198,12 +2198,12 @@
   _modec->rate=_modec->ssd=0;
   for(bi=0;bi<4;bi++){
     fragi=mb_map[0][bi];
-    dx=_mv[bi][0];
-    dy=_mv[bi][1];
+    dx=_mv[bi].v[0];
+    dy=_mv[bi].v[1];
     /*Save the block MVs as the current ones while we're here; we'll replace
        them if we don't ultimately choose 4MV mode.*/
-    frag_mvs[fragi][0]=(signed char)dx;
-    frag_mvs[fragi][1]=(signed char)dy;
+    frag_mvs[fragi].v[0]=(signed char)dx;
+    frag_mvs[fragi].v[1]=(signed char)dy;
     frag_offs=frag_buf_offs[fragi];
     if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
       satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
@@ -2223,16 +2223,16 @@
   nqis=_enc->state.nqis;
   for(bi=0;bi<4;bi++){
     if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
-      memset(lbmvs+bi,0,sizeof(*lbmvs));
+      memset(&lbmvs.v[bi],0,sizeof(lbmvs.v[0]));
     }
     else{
-      memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
-      bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
+      memcpy(&lbmvs.v[bi],_mv+bi,sizeof(lbmvs.v[0]));
+      bits0+=OC_MV_BITS[0][_mv[bi].v[0]+31]+OC_MV_BITS[0][_mv[bi].v[1]+31];
       bits1+=12;
     }
   }
-  (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
-   (const oc_mv *)lbmvs);
+  (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(&cbmvs,
+   (const oc_mv4 *)&lbmvs);
   map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
   map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
   /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
@@ -2242,8 +2242,8 @@
     pli=mapi>>2;
     bi=mapi&3;
     fragi=mb_map[pli][bi];
-    dx=cbmvs[bi][0];
-    dy=cbmvs[bi][1];
+    dx=cbmvs.v[bi].v[0];
+    dy=cbmvs.v[bi].v[1];
     frag_offs=frag_buf_offs[fragi];
     /*TODO: We could save half these calls by re-using the results for the Cb
        and Cr planes; is it worth it?*/
@@ -2314,7 +2314,7 @@
   luma_avg=OC_CLAMPI(90<<8,_enc->luma_avg,160<<8);
   mcu_rd_scale=_enc->mcu_rd_scale;
   mcu_rd_iscale=_enc->mcu_rd_iscale;
-  last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
+  last_mv.v[0]=last_mv.v[1]=prior_mv.v[0]=prior_mv.v[1]=0;
   /*Choose MVs and MB modes and quantize and code luma.
     Must be done in Hilbert order.*/
   map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
@@ -2405,18 +2405,18 @@
           oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
            pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd,rd_scale);
           mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
-           OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
+           OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV].v,
            pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
-           OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+           OC_MODE_INTER_MV_LAST,last_mv.v,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
-           OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+           OC_MODE_INTER_MV_LAST2,prior_mv.v,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
            embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
            OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
-           OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
+           OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD].v,
            pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           /*The explicit MV modes (2,6,7) have not yet gone through halfpel
              refinement.
@@ -2441,7 +2441,7 @@
               embs[mbi].refined|=0x40;
             }
             mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
-             OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
+             OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v,
              pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           }
           if(!(embs[mbi].refined&0x04)){
@@ -2449,7 +2449,7 @@
             embs[mbi].refined|=0x04;
           }
           mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
-           OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
+           OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV].v,
            pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
           /*Finally, pick the mode with the cheapest estimated R-D cost.*/
           mb_mode=OC_MODE_INTER_NOMV;
@@ -2496,26 +2496,26 @@
         if(mb_mode!=OC_MODE_INTER_MV_FOUR){
           switch(mb_mode){
             case OC_MODE_INTER_MV:{
-              dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV][0];
-              dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV][1];
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV].v[0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV].v[1];
             }break;
             case OC_MODE_INTER_MV_LAST:{
-              dx=last_mv[0];
-              dy=last_mv[1];
+              dx=last_mv.v[0];
+              dy=last_mv.v[1];
             }break;
             case OC_MODE_INTER_MV_LAST2:{
-              dx=prior_mv[0];
-              dy=prior_mv[1];
+              dx=prior_mv.v[0];
+              dy=prior_mv.v[1];
             }break;
             case OC_MODE_GOLDEN_MV:{
-              dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][0];
-              dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][1];
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v[0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v[1];
             }break;
           }
           for(bi=0;bi<4;bi++){
             fragi=mb_maps[mbi][0][bi];
-            frag_mvs[fragi][0]=(signed char)dx;
-            frag_mvs[fragi][1]=(signed char)dy;
+            frag_mvs[fragi].v[0]=(signed char)dx;
+            frag_mvs[fragi].v[1]=(signed char)dy;
           }
         }
         for(bi=0;bi<4;bi++){
@@ -2529,16 +2529,16 @@
           mb_mode=mb_modes[mbi];
           switch(mb_mode){
             case OC_MODE_INTER_MV:{
-              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
               /*If we're backing out from 4MV, find the MV we're actually
                  using.*/
               if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
                 for(bi=0;;bi++){
                   fragi=mb_maps[mbi][0][bi];
                   if(frags[fragi].coded){
-                    memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
-                    dx=frag_mvs[fragi][0];
-                    dy=frag_mvs[fragi][1];
+                    memcpy(last_mv.v,frag_mvs[fragi].v,sizeof(last_mv));
+                    dx=frag_mvs[fragi].v[0];
+                    dy=frag_mvs[fragi].v[1];
                     break;
                   }
                 }
@@ -2546,39 +2546,39 @@
               }
               /*Otherwise we used the original analysis MV.*/
               else{
-                memcpy(last_mv,
-                 embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
+                memcpy(last_mv.v,
+                 embs[mbi].analysis_mv[0][OC_FRAME_PREV].v,sizeof(last_mv));
               }
               _enc->mv_bits[0]+=mb_mv_bits_0;
               _enc->mv_bits[1]+=12;
             }break;
             case OC_MODE_INTER_MV_LAST2:{
               oc_mv tmp_mv;
-              memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
-              memcpy(prior_mv,last_mv,sizeof(prior_mv));
-              memcpy(last_mv,tmp_mv,sizeof(last_mv));
+              memcpy(tmp_mv.v,prior_mv.v,sizeof(tmp_mv));
+              memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
+              memcpy(last_mv.v,tmp_mv.v,sizeof(last_mv));
             }break;
             case OC_MODE_GOLDEN_MV:{
               _enc->mv_bits[0]+=mb_gmv_bits_0;
               _enc->mv_bits[1]+=12;
             }break;
             case OC_MODE_INTER_MV_FOUR:{
-              oc_mv lbmvs[4];
-              oc_mv cbmvs[4];
-              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              oc_mv4 lbmvs;
+              oc_mv4 cbmvs;
+              memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
               for(bi=0;bi<4;bi++){
                 fragi=mb_maps[mbi][0][bi];
                 if(frags[fragi].coded){
-                  memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
-                  memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
-                  _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
-                   +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
+                  memcpy(last_mv.v,frag_mvs[fragi].v,sizeof(last_mv));
+                  memcpy(lbmvs.v[bi].v,frag_mvs[fragi].v,sizeof(lbmvs.v[bi]));
+                  _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi].v[0]+31]
+                   +OC_MV_BITS[0][frag_mvs[fragi].v[1]+31];
                   _enc->mv_bits[1]+=12;
                 }
                 /*Replace the block MVs for not-coded blocks with (0,0).*/
-                else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
+                else memset(lbmvs.v[bi].v,0,sizeof(lbmvs.v[bi]));
               }
-              (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+              (*set_chroma_mvs)(&cbmvs,&lbmvs);
               for(mapii=4;mapii<nmap_idxs;mapii++){
                 mapi=map_idxs[mapii];
                 pli=mapi>>2;
@@ -2586,7 +2586,7 @@
                 fragi=mb_maps[mbi][pli][bi];
                 frags[fragi].mb_mode=mb_mode;
                 frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
-                memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
+                memcpy(frag_mvs[fragi].v,&cbmvs.v[bi],sizeof(frag_mvs[fragi]));
               }
             }break;
           }
@@ -2613,8 +2613,8 @@
                values won't have been chosen with the right MV, but it's
                probaby not worth re-estimating them.*/
             frags[fragi].qii=modes[mb_mode].qii[mapii];
-            frag_mvs[fragi][0]=(signed char)dx;
-            frag_mvs[fragi][1]=(signed char)dy;
+            frag_mvs[fragi].v[0]=(signed char)dx;
+            frag_mvs[fragi].v[1]=(signed char)dy;
           }
         }
         /*Save masking scale factors for chroma blocks.*/

Modified: branches/theorarm-merge-branch/lib/bitpack.h
===================================================================
--- branches/theorarm-merge-branch/lib/bitpack.h	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/bitpack.h	2010-04-24 14:22:55 UTC (rev 17173)
@@ -51,10 +51,10 @@
 /* returns -1 for read beyond EOF, or the number of whole bytes available */
 long oc_pack_bytes_left(oc_pack_buf *_b);
 
-/*These two functions are implemented locally in huffdec.c*/
+/*These two functions are implemented in huffdec.c*/
 /*Read in bits without advancing the bitptr.
   Here we assume 0<=_bits&&_bits<=32.*/
-/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
-/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
+long oc_pack_look(oc_pack_buf *_b,int _bits);
+void oc_pack_adv(oc_pack_buf *_b,int _bits);
 
 #endif

Modified: branches/theorarm-merge-branch/lib/decode.c
===================================================================
--- branches/theorarm-merge-branch/lib/decode.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/decode.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -568,7 +568,6 @@
 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
   oc_sb_flags *sb_flags;
   unsigned     nsbs;
-  unsigned     sbi;
   unsigned     npartial;
   unsigned     run_count;
   int          flag;
@@ -781,7 +780,6 @@
   unsigned char        scheme0_alphabet[8];
   oc_mode_unpack_func  mode_unpack;
   size_t               nmbs;
-  size_t               mbi;
   int                  mode_scheme;
   mode_scheme=(int)oc_pack_read(&_dec->opb,3);
   if(mode_scheme==0){
@@ -977,7 +975,6 @@
 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
   oc_fragment     *frags;
   const ptrdiff_t *coded_fragis;
-  const ptrdiff_t *coded_fragis_end;
   ptrdiff_t        ncoded_fragis;
   ptrdiff_t        fragi;
   ncoded_fragis=_dec->state.ntotal_coded_fragis;
@@ -1041,7 +1038,7 @@
           flag=!flag;
         run_count=oc_sb_run_unpack(&_dec->opb);
         full_run=run_count>=4129;
-        for(;coded_fragis<coded_fragis_end;*coded_fragis++){
+        for(;coded_fragis<coded_fragis_end;coded_fragis++){
           fragi=*coded_fragis;
           if(frags[fragi].qii==0)continue;
           if(run_count--<=0)break;
@@ -1165,7 +1162,6 @@
     ptrdiff_t run_counts[64];
     ptrdiff_t eob_count;
     size_t    ntoks_left;
-    size_t    ntoks;
     int       rli;
     _dec->eob_runs[pli][_zzi]=_eobs;
     _dec->ti0[pli][_zzi]=ti;
@@ -2201,7 +2197,11 @@
     int                   pli;
     int                   notstart;
     int                   notdone;
+#ifdef OC_LIBOGG2
     oc_pack_readinit(&_dec->opb,_op->packet);
+#else
+    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#endif
 #if defined(HAVE_CAIRO)
     _dec->telemetry_frame_bytes=_op->bytes;
 #endif

Modified: branches/theorarm-merge-branch/lib/encint.h
===================================================================
--- branches/theorarm-merge-branch/lib/encint.h	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/encint.h	2010-04-24 14:22:55 UTC (rev 17173)
@@ -26,8 +26,6 @@
 
 
 
-typedef oc_mv                         oc_mv2[2];
-
 typedef struct oc_enc_opt_vtable      oc_enc_opt_vtable;
 typedef struct oc_mb_enc_info         oc_mb_enc_info;
 typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
@@ -105,6 +103,7 @@
 void oc_enc_vtable_init(oc_enc_ctx *_enc);
 
 
+typedef oc_mv oc_mv2_[2];
 
 /*Encoder-specific macroblock information.*/
 struct oc_mb_enc_info{
@@ -124,7 +123,7 @@
      can be used to estimate constant velocity and constant acceleration
      predictors.
     Uninitialized MVs are (0,0).*/
-  oc_mv2        analysis_mv[3];
+  oc_mv2_       analysis_mv[3];
   /*Current unrefined analysis MVs.*/
   oc_mv         unref_mv[2];
   /*Unrefined block MVs.*/

Modified: branches/theorarm-merge-branch/lib/encode.c
===================================================================
--- branches/theorarm-merge-branch/lib/encode.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/encode.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -663,7 +663,7 @@
           fragi=mb_maps[mbi][0][bi];
           if(frags[fragi].coded){
             oc_enc_mv_pack(_enc,mv_scheme,
-             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+             frag_mvs[fragi].v[0],frag_mvs[fragi].v[1]);
             /*Only code a single MV for this macro block.*/
             break;
           }
@@ -674,7 +674,7 @@
           fragi=mb_maps[mbi][0][bi];
           if(frags[fragi].coded){
             oc_enc_mv_pack(_enc,mv_scheme,
-             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+             frag_mvs[fragi].v[0],frag_mvs[fragi].v[1]);
             /*Keep coding all the MVs for this macro block.*/
           }
         }

Modified: branches/theorarm-merge-branch/lib/huffdec.c
===================================================================
--- branches/theorarm-merge-branch/lib/huffdec.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/huffdec.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -130,7 +130,7 @@
 
 /*Read in bits without advancing the bit pointer.
   Here we assume 0<=_bits&&_bits<=32.*/
-static long oc_pack_look(oc_pack_buf *_b,int _bits){
+long oc_pack_look(oc_pack_buf *_b,int _bits){
   oc_pb_window window;
   int          available;
   long         result;
@@ -143,7 +143,7 @@
 }
 
 /*Advance the bit pointer.*/
-static void oc_pack_adv(oc_pack_buf *_b,int _bits){
+void oc_pack_adv(oc_pack_buf *_b,int _bits){
   /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
      never used actually used.
     OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read

Modified: branches/theorarm-merge-branch/lib/internal.h
===================================================================
--- branches/theorarm-merge-branch/lib/internal.h	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/internal.h	2010-04-24 14:22:55 UTC (rev 17173)
@@ -341,7 +341,7 @@
    const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
    int _dst_frame,int _src_frame,int _pli);
   void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
-   int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);  
+   signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);  
   void (*restore_fpu)(void);
 };
 
@@ -519,7 +519,7 @@
 void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu(const oc_theora_state *_state);
-void oc_idct8x8(ogg_int16_t _y[64],int _last_zzi);
+void oc_idct8x8_arm(ogg_int16_t _y[64],int _last_zzi);
 
 /*Default pure-C implementations.*/
 void oc_frag_copy_c(unsigned char *_dst,
@@ -537,7 +537,7 @@
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);
 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu_c(void);
 
 /*We need a way to call a few encoder functions without introducing a link-time

Modified: branches/theorarm-merge-branch/lib/mcenc.c
===================================================================
--- branches/theorarm-merge-branch/lib/mcenc.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/mcenc.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -102,8 +102,8 @@
     /*Fill in the first part of set A: the vectors from adjacent blocks.*/
     for(i=0;i<embs[_mbi].ncneighbors;i++){
       nmbi=embs[_mbi].cneighbors[i];
-      _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
-      _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
+      _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame].v[0];
+      _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame].v[1];
       ncandidates++;
     }
   }
@@ -113,9 +113,9 @@
   _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
   ncandidates++;
   _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
-   embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
+   embs[_mbi].analysis_mv[1][_frame].v[0]+_accum[0],31);
   _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
-   embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
+   embs[_mbi].analysis_mv[1][_frame].v[1]+_accum[1],31);
   ncandidates++;
   _mcenc->candidates[ncandidates][0]=0;
   _mcenc->candidates[ncandidates][1]=0;
@@ -137,11 +137,11 @@
   nmbi=_mbi;
   for(i=0;;i++){
     _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
-     2*embs[_mbi].analysis_mv[1][_frame][0]
-     -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
+     2*embs[_mbi].analysis_mv[1][_frame].v[0]
+     -embs[_mbi].analysis_mv[2][_frame].v[0]+_accum[0],31);
     _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
-     2*embs[_mbi].analysis_mv[1][_frame][1]
-     -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
+     2*embs[_mbi].analysis_mv[1][_frame].v[1]
+     -embs[_mbi].analysis_mv[2][_frame].v[1]+_accum[1],31);
     ncandidates++;
     if(i>=embs[_mbi].npneighbors)break;
     nmbi=embs[_mbi].pneighbors[i];
@@ -483,57 +483,57 @@
   candy=best_vec[1];
   embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
    frag_buf_offs,fragis,candx,candy,src,ref,ystride);
-  embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
-  embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
+  embs[_mbi].analysis_mv[0][_frame].v[0]=(signed char)(candx<<1);
+  embs[_mbi].analysis_mv[0][_frame].v[1]=(signed char)(candy<<1);
   if(_frame==OC_FRAME_PREV){
     for(bi=0;bi<4;bi++){
       candx=best_block_vec[bi][0];
       candy=best_block_vec[bi][1];
       embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
        frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
-      embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
-      embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
+      embs[_mbi].block_mv[bi].v[0]=(signed char)(candx<<1);
+      embs[_mbi].block_mv[bi].v[1]=(signed char)(candy<<1);
     }
   }
 }
 
 void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
-  oc_mv2         *mvs;
+  oc_mv2_        *mvs;
   int             accum_p[2];
   int             accum_g[2];
-  mvs=_enc->mb_info[_mbi].analysis_mv;
+  mvs=&_enc->mb_info[_mbi].analysis_mv[0];
   if(_enc->prevframe_dropped){
-    accum_p[0]=mvs[0][OC_FRAME_PREV][0];
-    accum_p[1]=mvs[0][OC_FRAME_PREV][1];
+    accum_p[0]=mvs[0][OC_FRAME_PREV].v[0];
+    accum_p[1]=mvs[0][OC_FRAME_PREV].v[1];
   }
   else accum_p[1]=accum_p[0]=0;
-  accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
-  accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
-  mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
-  mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
+  accum_g[0]=mvs[2][OC_FRAME_GOLD].v[0];
+  accum_g[1]=mvs[2][OC_FRAME_GOLD].v[1];
+  mvs[0][OC_FRAME_PREV].v[0]-=mvs[2][OC_FRAME_PREV].v[0];
+  mvs[0][OC_FRAME_PREV].v[1]-=mvs[2][OC_FRAME_PREV].v[1];
   /*Move the motion vector predictors back a frame.*/
   memmove(mvs+1,mvs,2*sizeof(*mvs));
   /*Search the last frame.*/
   oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
-  mvs[2][OC_FRAME_PREV][0]=accum_p[0];
-  mvs[2][OC_FRAME_PREV][1]=accum_p[1];
+  mvs[2][OC_FRAME_PREV].v[0]=accum_p[0];
+  mvs[2][OC_FRAME_PREV].v[1]=accum_p[1];
   /*GOLDEN MVs are different from PREV MVs in that they're each absolute
      offsets from some frame in the past rather than relative offsets from the
      frame before.
     For predictor calculation to make sense, we need them to be in the same
      form as PREV MVs.*/
-  mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
-  mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
-  mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
-  mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
+  mvs[1][OC_FRAME_GOLD].v[0]-=mvs[2][OC_FRAME_GOLD].v[0];
+  mvs[1][OC_FRAME_GOLD].v[1]-=mvs[2][OC_FRAME_GOLD].v[1];
+  mvs[2][OC_FRAME_GOLD].v[0]-=accum_g[0];
+  mvs[2][OC_FRAME_GOLD].v[1]-=accum_g[1];
   /*Search the golden frame.*/
   oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
   /*Put GOLDEN MVs back into absolute offset form.
     The newest MV is already an absolute offset.*/
-  mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
-  mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
-  mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
-  mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
+  mvs[2][OC_FRAME_GOLD].v[0]+=accum_g[0];
+  mvs[2][OC_FRAME_GOLD].v[1]+=accum_g[1];
+  mvs[1][OC_FRAME_GOLD].v[0]+=mvs[2][OC_FRAME_GOLD].v[0];
+  mvs[1][OC_FRAME_GOLD].v[1]+=mvs[2][OC_FRAME_GOLD].v[1];
 }
 
 #if 0
@@ -650,12 +650,12 @@
   oc_mb_enc_info *embs;
   int             vec[2];
   embs=_enc->mb_info;
-  vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
-  vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
+  vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame].v[0]);
+  vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame].v[1]);
   embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
    _mbi,vec,embs[_mbi].satd[_frame],_frame);
-  embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
-  embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
+  embs[_mbi].analysis_mv[0][_frame].v[0]=(signed char)vec[0];
+  embs[_mbi].analysis_mv[0][_frame].v[1]=(signed char)vec[1];
 }
 
 #if 0
@@ -766,11 +766,11 @@
     ptrdiff_t frag_offs;
     int       vec[2];
     frag_offs=frag_buf_offs[fragis[bi]];
-    vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
-    vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
+    vec[0]=OC_DIV2(embs[_mbi].block_mv[bi].v[0]);
+    vec[1]=OC_DIV2(embs[_mbi].block_mv[bi].v[1]);
     embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
      src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
-    embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
-    embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];
+    embs[_mbi].ref_mv[bi].v[0]=(signed char)vec[0];
+    embs[_mbi].ref_mv[bi].v[1]=(signed char)vec[1];
   }
 }

Modified: branches/theorarm-merge-branch/lib/state.c
===================================================================
--- branches/theorarm-merge-branch/lib/state.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/state.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -910,7 +910,7 @@
      _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
      +frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+     _state->frag_mvs[_fragi].v[0],_state->frag_mvs[_fragi].v[1])>1){
       oc_frag_recon_inter2(_state,
        dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
     }
@@ -952,7 +952,7 @@
   }
 }
 
-static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
   int y;
   _pix-=2;
   for(y=0;y<8;y++){
@@ -968,7 +968,7 @@
   }
 }
 
-static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
   int x;
   _pix-=_ystride*2;
   for(x=0;x<8;x++){
@@ -986,7 +986,7 @@
 /*Initialize the bounding values array used by the loop filter.
   _bv: Storage for the array.
   Return: 0 on success, or a non-zero value if no filtering need be applied.*/
-int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
+int oc_state_loop_filter_init(oc_theora_state *_state,signed char _bv[256]){
   int flimit;
   int i;
   flimit=_state->loop_filter_limits[_state->qis[0]];
@@ -1009,13 +1009,13 @@
   _pli:       The color plane to filter.
   _fragy0:    The Y coordinate of the first fragment row to filter.
   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,signed char *_bv,
  int _refi,int _pli,int _fragy0,int _fragy_end){
   _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
    _fragy0,_fragy_end);
 }
 
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,signed char *_bv,
  int _refi,int _pli,int _fragy0,int _fragy_end){
   const oc_fragment_plane *fplane;
   const oc_fragment       *frags;

Modified: branches/theorarm-merge-branch/lib/x86/mmxstate.c
===================================================================
--- branches/theorarm-merge-branch/lib/x86/mmxstate.c	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/x86/mmxstate.c	2010-04-24 14:22:55 UTC (rev 17173)
@@ -86,7 +86,7 @@
      _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
      +frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+     _state->frag_mvs[_fragi].v[0],_state->frag_mvs[_fragi].v[1])>1){
       oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
        _dct_coeffs);
     }
@@ -133,7 +133,7 @@
   _fragy0:    The Y coordinate of the first fragment row to filter.
   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
   OC_ALIGN8(unsigned char   ll[8]);
   const oc_fragment_plane *fplane;
   const oc_fragment       *frags;

Modified: branches/theorarm-merge-branch/lib/x86/x86int.h
===================================================================
--- branches/theorarm-merge-branch/lib/x86/x86int.h	2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/x86/x86int.h	2010-04-24 14:22:55 UTC (rev 17173)
@@ -36,7 +36,7 @@
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);
 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);
 void oc_restore_fpu_mmx(void);
 
 #endif



More information about the commits mailing list