[xiph-commits] r15825 - branches/theora-thusnelda/lib/enc

Sat Mar 21 14:11:55 PDT 2009

Author: tterribe
Date: 2009-03-21 14:11:55 -0700 (Sat, 21 Mar 2009)
New Revision: 15825

Added:
   branches/theora-thusnelda/lib/enc/mathops.c
   branches/theora-thusnelda/lib/enc/mathops.h
Log:
Forgot to add two files needed for the fixed-point rate control.


Added: branches/theora-thusnelda/lib/enc/mathops.c
===================================================================

--- branches/theora-thusnelda/lib/enc/mathops.c	                        (rev 0)
+++ branches/theora-thusnelda/lib/enc/mathops.c	2009-03-21 21:11:55 UTC (rev 15825)
@@ -0,0 +1,296 @@
+#include "mathops.h"
+#include <limits.h>
+
+/*The fastest fallback strategy for platforms with fast multiplication appears
+   to be based on de Bruijn sequences~\cite{LP98}.
+  Tests confirmed this to be true even on an ARM11, where it is actually faster
+   than using the native clz instruction.
+  Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where
+   multiplication or table lookups are too expensive.
+
+  @UNPUBLISHED{LP98,
+    author="Charles E. Leiserson and Harald Prokop",
+    title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
+    month=Jun,
+    year=1998,
+    note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
+  }*/
+#if !defined(OC_ILOG_NODEBRUIJN)&& \
+ !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL
+static const unsigned char OC_DEBRUIJN_IDX32[32]={
+   0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
+  31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
+};
+#endif
+
+int oc_ilog32(ogg_uint32_t _v){
+#if defined(OC_CLZ32)
+  return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v;
+#else
+/*On a Pentium M, this branchless version tested as the fastest version without
+   multiplications on 1,000,000,000 random 32-bit integers, edging out a
+   similar version with branches, and a 256-entry LUT version.*/
+# if defined(OC_ILOG_NODEBRUIJN)
+  int ret;
+  int m;
+  ret=_v>0;
+  m=(_v>0xFFFFU)<<4;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFFU)<<3;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFU)<<2;
+  _v>>=m;
+  ret|=m;
+  m=(_v>3)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=_v>1;
+  return ret;
+/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
+# else
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
+  return ret;
+# endif
+#endif
+}
+
+int oc_ilog64(ogg_int64_t _v){
+#if defined(OC_CLZ64)
+  return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v;
+#else
+# if defined(OC_ILOG_NODEBRUIJN)
+  ogg_uint32_t v;
+  int          ret;
+  int          m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(ogg_uint32_t)(_v>>m);
+  ret|=m;
+  m=(v>0xFFFFU)<<4;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFFU)<<3;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFU)<<2;
+  v>>=m;
+  ret|=m;
+  m=(v>3)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=v>1;
+  return ret;
+# else
+/*If we don't have a 64-bit word, split it into two 32-bit halves.*/
+#  if LONG_MAX<9223372036854775807LL
+  ogg_uint32_t v;
+  int          ret;
+  int          m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(ogg_uint32_t)(_v>>m);
+  ret|=m;
+  v|=v>>1;
+  v|=v>>2;
+  v|=v>>4;
+  v|=v>>8;
+  v|=v>>16;
+  v=(v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
+  return ret;
+/*Otherwise do it in one 64-bit operation.*/
+#  else
+  static const unsigned char OC_DEBRUIJN_IDX64[64]={
+     0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
+     5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
+    63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
+    62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
+  };
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v|=_v>>32;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
+  return ret;
+#  endif
+# endif
+#endif
+}
+
+/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/
+static const ogg_int64_t OC_ATANH_LOG2[32]={
+  0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL,
+  0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL,
+  0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL,
+  0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL,
+  0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL,
+  0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL,
+  0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL,
+  0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL,
+  0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL,
+  0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL,
+  0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL
+};
+
+/*Computes the binary exponential of _log2, a log base 2 in Q57 format.*/
+ogg_int64_t oc_bexp64(ogg_int64_t _z){
+  ogg_int64_t w;
+  ogg_int64_t z;
+  int         ipart;
+  ipart=(int)(_z>>57);
+  if(ipart<0)return 0;
+  if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL;
+  z=_z-((ogg_int64_t)ipart<<57);
+  if(z){
+    ogg_int64_t mask;
+    long        wlo;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q62 format.
+      We need 1 bit of headroom since the magnitude can get larger than 1
+       during the iteration, and a sign bit.*/
+    z<<=5;
+    /*w is the exponential in Q61 format (since it also needs headroom and can
+       get as large as 2.0); we could get another bit if we dropped the sign,
+       but we'll recover that bit later anyway.
+      Ideally this should start out as
+        \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
+       but in order to guarantee convergence we have to repeat iterations 4,
+        13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
+    w=0x26A3D0E401DD846DLL;
+    for(i=0;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 4.*/
+      if(i>=3)break;
+      z<<=1;
+    }
+    for(;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 13.*/
+      if(i>=12)break;
+      z<<=1;
+    }
+    for(;i<32;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1;
+    }
+    wlo=0;
+    /*Skip the remaining iterations unless we really require that much
+       precision.
+      We could have bailed out earlier for smaller iparts, but that would
+       require initializing w from a table, as the limit doesn't converge to
+       61-bit precision until n=30.*/
+    if(ipart>30){
+      /*For these iterations, we just update the low bits, as the high bits
+         can't possibly be affected.
+        OC_ATANH_LOG2 has also converged (it actually did so one iteration
+         earlier, but that's no reason for an extra special case).*/
+      for(;;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z-=OC_ATANH_LOG2[31]+mask^mask;
+        /*Repeat iteration 40.*/
+        if(i>=39)break;
+        z<<=1;
+      }
+      for(;i<61;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1;
+      }
+    }
+    w=(w<<1)+wlo;
+  }
+  else w=(ogg_int64_t)1<<62;
+  if(ipart<62)w=(w>>61-ipart)+1>>1;
+  return w;
+}
+
+/*Computes the binary logarithm of _w, returned in Q57 format.*/
+ogg_int64_t oc_blog64(ogg_int64_t _w){
+  ogg_int64_t z;
+  int         ipart;
+  if(_w<=0)return -1;
+  ipart=OC_ILOGNZ_64(_w)-1;
+  if(ipart>61)_w>>=ipart-61;
+  else _w<<=61-ipart;
+  z=0;
+  if(_w&_w-1){
+    ogg_int64_t x;
+    ogg_int64_t y;
+    ogg_int64_t u;
+    ogg_int64_t mask;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q61 format.*/
+    /*x and y are the cosh() and sinh(), respectively, in Q61 format.
+      We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/
+    x=_w+((ogg_int64_t)1<<61);
+    y=_w-((ogg_int64_t)1<<61);
+    for(i=0;i<4;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 4.*/
+    for(i--;i<13;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 13.*/
+    for(i--;i<32;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*OC_ATANH_LOG2 has converged.*/
+    for(;i<40;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 40.*/
+    for(i--;i<62;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    z=z+8>>4;
+  }
+  return ((ogg_int64_t)ipart<<57)+z;
+}

Added: branches/theora-thusnelda/lib/enc/mathops.h
===================================================================
--- branches/theora-thusnelda/lib/enc/mathops.h	                        (rev 0)
+++ branches/theora-thusnelda/lib/enc/mathops.h	2009-03-21 21:11:55 UTC (rev 15825)
@@ -0,0 +1,139 @@
+#if !defined(_mathops_H)
+# define _mathops_H (1)
+# include <ogg/ogg.h>
+
+# ifdef __GNUC_PREREQ
+#  if __GNUC_PREREQ(3,4)
+#   include <limits.h>
+/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from
+   "upgrading" the type of an entire expression to an (unsigned) size_t.*/
+#   if INT_MAX>=2147483647
+#    define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define OC_CLZ32(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=2147483647L
+#    define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define OC_CLZ32(_x) (__builtin_clzl(_x))
+#   endif
+#   if INT_MAX>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clzl(_x))
+#   elif LLONG_MAX>=9223372036854775807LL|| \
+     __LONG_LONG_MAX__>=9223372036854775807LL
+#    define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
+#    define OC_CLZ64(_x) (__builtin_clzll(_x))
+#   endif
+#  endif
+# endif
+
+
+
+/**
+ * oc_ilog32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog32(ogg_uint32_t _v);
+/**
+ * oc_ilog64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog64(ogg_int64_t _v);
+
+
+# if defined(OC_CLZ32)
+/**
+ * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
+ * @_v: A non-zero 32-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_32() instead.
+ */
+#  define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v))
+/**
+ * OC_ILOG_32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_32(_v)   (OC_ILOGNZ_32(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_32(_v) (oc_ilog32(_v))
+#  define OC_ILOG_32(_v)   (oc_ilog32(_v))
+# endif
+
+# if defined(CLZ64)
+/**
+ * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
+ * @_v: A non-zero 64-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_64() instead.
+ */
+#  define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
+/**
+ * OC_ILOG_64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_64(_v)   (OC_ILOGNZ_64(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_64(_v) (oc_ilog64(_v))
+#  define OC_ILOG_64(_v)   (oc_ilog64(_v))
+# endif
+
+# define OC_STATIC_ILOG0(_v) (!!(_v))
+# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v))
+# define OC_STATIC_ILOG2(_v) \
+ (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v))
+# define OC_STATIC_ILOG3(_v) \
+ (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v))
+# define OC_STATIC_ILOG4(_v) \
+ (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v))
+# define OC_STATIC_ILOG5(_v) \
+ (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v))
+# define OC_STATIC_ILOG6(_v) \
+ (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v))
+/**
+ * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
+ * @_v: A non-negative 32-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v)))
+/**
+ * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
+ * @_v: A non-negative 64-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v)))
+
+ogg_int64_t oc_bexp64(ogg_int64_t _z);
+ogg_int64_t oc_blog64(ogg_int64_t _w);
+
+#endif