[Flac-dev] Fast bit stuff
Frank Klemm
pfk at fuchs.offl.uni-jena.de
Sun Jul 22 12:15:06 PDT 2001
Hope this helps. Take the best ideas and forget the rest.
Sorry about the bad documentation.
----------------------------------------------------------------------
/* libFLAC - Free Lossless Audio Codec library
* Copyright (C) 2001 Josh Coalson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
//#define TEST
#ifndef TEST
# include "private/bitmath.h"
# include "FLAC/assert.h"
#else
# define FLAC__ASSERT(x) (void)(x)
#endif
/*
* Typedefs for better compatibilty. You get the ability to express what
* you really need instead of some blurry and fuzzy words. These are C99
* names, so it conflicts with future compilers. May be all this types
* should prepended by something like 'flac_' or 'flac__' or 'FLAC__'.
*/
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t; // unsigned __int64 for MSC and Intel-C
typedef unsigned int uint_least8_t;
typedef unsigned int uint_least16_t;
typedef unsigned int uint_least32_t;
typedef unsigned long long uint_least64_t; // unsigned __int64 for MSC and Intel-C
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long long int64_t; // signed __int64 for MSC and Intel-C
typedef signed int int_least8_t;
typedef signed int int_least16_t;
typedef signed int int_least32_t;
typedef signed long long int_least64_t; // signed __int64 for MSC and Intel-C
typedef float ieee754_float32_t;
typedef double ieee754_float64_t;
typedef long double ieee854_float80_t; // not available for MSC > 4.0, with compiler option for Intel-C
/* typedef unsigned int size_t; */
/* typedef signed int ssize_t; */
/*
* This is a helper table for fast lookup. It contains
*
* for x = 0: -2
* for 0 < x < 256: floor ( log2 (x) )
*
*/
static const signed char helper [256] = {
-2,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
/*
* Calculates
*
* for x = 0: assert ()
* for 0 < x < 2^32: floor ( log2 (x) )
*
*/
unsigned int FLAC__bitmath_ilog2_1 ( uint_least32_t v )
{
unsigned int l = 0;
FLAC__ASSERT (v > 0);
while ( v >>= 1 )
l++;
return l;
}
unsigned int FLAC__bitmath_ilog2_2 ( uint_least32_t v )
{
unsigned int l = 0;
FLAC__ASSERT (v > 0);
if ( v > 0xFFFF ) l += 16, v >>= 16;
if ( v > 0x00FF ) l += 8, v >>= 8;
if ( v > 0x000F ) l += 4, v >>= 4;
if ( v > 0x0003 ) l += 2, v >>= 2;
if ( v > 0x0001 ) l += 1, v >>= 1;
return l;
}
unsigned int FLAC__bitmath_ilog2_3 ( uint_least32_t v )
{
FLAC__ASSERT (v > 0);
if ( v > 0xFF ) {
if ( v > 0xFFFF ) {
if ( v > 0xFFFFFF ) {
return helper [v >> 24] + 24;
}
return helper [v >> 16] + 16;
}
return helper [v >> 8] + 8;
}
return helper[v];
}
/*
* The limitation comes from the fact that a double => ieee754_float32_t
* convertion can round of numbers with are not depictable by
* ieee754_float32_t, for instance (ieee754_float32_t)0x7FFFFFFF = 2147483648.0f
*/
unsigned int FLAC__bitmath_ilog2_4 ( ieee754_float32_t v ) /* |v| must be smaller than 2^25 */
{
FLAC__ASSERT (v > 0);
return ( *(uint32_t*)(&v) >> 23 ) - 127;
}
unsigned int FLAC__bitmath_ilog2_5 ( ieee754_float64_t v ) /* |v| must be smaller than 2^52 */
{
FLAC__ASSERT (v > 0);
#ifdef WORDS_BIG_ENDIAN
return ( ((uint32_t*)(&v))[0] >> 20 ) - 1023;
#else
return ( ((uint32_t*)(&v))[1] >> 20 ) - 1023;
#endif
}
unsigned int FLAC__bitmath_ilog2_6 ( ieee854_float80_t v ) /* |v| must be smaller than 2^64 */
{
FLAC__ASSERT (v > 0);
#ifdef WORDS_BIG_ENDIAN
return ( ((uint32_t*)(&v))[0] >> 16 ) - 16383;
#else
return ( ((uint32_t*)(&v))[2] & 0x7FFF ) - 16383;
#endif
}
/*
* Calculates
*
* for -2^31 <= x < -1: 2 + floor ( log2 (-1-x) )
* for x = -1: 2
* for x = 0: 0
* for 0 < x < 2^31: 2 + floor ( log2 (x) )
*
*/
unsigned int FLAC__bitmath_silog2_1 ( int_least32_t v )
{
while(1) {
if(v == 0) {
return 0;
}
else if(v > 0) {
unsigned l = 0;
while(v) {
l++;
v >>= 1;
}
return l+1;
}
else if(v == -1) {
return 2;
}
else {
v = -(++v);
}
}
}
unsigned int FLAC__bitmath_silog2_2 ( int_least32_t v )
{
if ( v < 0 ) {
v = ~v;
if ( v == 0 )
return 2;
}
if ( v > 0xFF ) {
if ( v > 0xFFFF ) {
if ( v > 0xFFFFFF ) {
return helper [v >> 24] + 26;
}
return helper [v >> 16] + 18;
}
return helper [v >> 8] + 10;
}
return helper[v] + 2;
}
unsigned int FLAC__bitmath_silog2_3 ( int_least32_t v )
{
ieee754_float32_t x;
if ( v <= 0 ) {
if ( v == 0 )
return 0;
v = ~v;
if ( v == 0 )
return 2;
}
x = v;
return ( *(uint32_t*)(&x) >> 23 ) - 125;
}
unsigned int FLAC__bitmath_silog2_4 ( ieee754_float32_t v ) /* |v| must be smaller than 2^25 */
{
if ( v <= 0 ) {
if ( v == 0 )
return 0;
v = -1.0 - v;
if ( v == 0 )
return 2;
}
return ( *(uint32_t*)(&v) >> 23 ) - 125;
}
unsigned int FLAC__bitmath_silog2_5 ( ieee754_float64_t v ) /* |v| must be smaller than 2^52 */
{
if ( v <= 0 ) {
if ( v == 0 )
return 0;
v = -1.0 - v;
if ( v == 0 )
return 2;
}
#ifdef WORDS_BIG_ENDIAN
return ( ((uint32_t*)(&v))[0] >> 20 ) - 1021;
#else
return ( ((uint32_t*)(&v))[1] >> 20 ) - 1021;
#endif
}
unsigned int FLAC__bitmath_silog2_6 ( ieee854_float80_t v ) /* |v| must be smaller than 2^64 */
{
if ( v <= 0 ) {
if ( v == 0 )
return 0;
v = -1.0L - v;
if ( v == 0 )
return 2;
}
#ifdef WORDS_BIG_ENDIAN
return ( ((uint32_t*)(&v))[0] >> 16 ) - 16381;
#else
return ( ((uint32_t*)(&v))[2] & 0x7FFF ) - 16381;
#endif
}
#ifdef TEST
/*
3.2 nsec for NOP
106.9 nsec for FLAC__bitmath_ilog2_1
8.6 nsec for FLAC__bitmath_ilog2_2
3.3 nsec for FLAC__bitmath_ilog2_3
3.3 nsec for FLAC__bitmath_ilog2_4
3.2 nsec for NOP
131.8 nsec for FLAC__bitmath_silog2_1
3.3 nsec for FLAC__bitmath_silog2_2
3.2 nsec for FLAC__bitmath_silog2_3
5.0 nsec for FLAC__bitmath_silog2_4
3.6 nsec for NOP
105.9 nsec for FLAC__bitmath_ilog2_1
3.3 nsec for FLAC__bitmath_ilog2_2
3.4 nsec for FLAC__bitmath_ilog2_3
3.2 nsec for FLAC__bitmath_ilog2_4
3.2 nsec for NOP
127.4 nsec for FLAC__bitmath_silog2_1
3.2 nsec for FLAC__bitmath_silog2_2
5.0 nsec for FLAC__bitmath_silog2_3
4.9 nsec for FLAC__bitmath_silog2_4
3.3 nsec for NOP
105.5 nsec for FLAC__bitmath_ilog2_1
3.2 nsec for FLAC__bitmath_ilog2_2
3.3 nsec for FLAC__bitmath_ilog2_3
3.3 nsec for FLAC__bitmath_ilog2_4
3.3 nsec for NOP
127.0 nsec for FLAC__bitmath_silog2_1
3.4 nsec for FLAC__bitmath_silog2_2
5.0 nsec for FLAC__bitmath_silog2_3
9.2 nsec for FLAC__bitmath_silog2_4
Note:
Times for FLAC__bitmath_ilog2_1 and FLAC__bitmath_silog2_1 are better in reality (residuals are generally small),
for FLAC__bitmath_ilog2_2 and FLAC__bitmath_silog2_2 are not so good (mispedicted jumps if CPU don't have
conditional increments and shifts. FLAC__bitmath_silog2_4 is not so good for Intel IA32, Compares are normally slow.
*/
#include <stdio.h>
#include <time.h>
static double gettime ( void )
{
return clock() * (1./CLOCKS_PER_SEC);
}
int main ( void )
{
int i;
int sum = 0;
double t;
for ( i = 1; i <= 256; i++ )
printf ("%11d %2u %2u %2u %2u %2u %2u\n", i, FLAC__bitmath_ilog2_1(i), FLAC__bitmath_ilog2_2(i), FLAC__bitmath_ilog2_3(i), FLAC__bitmath_ilog2_4(i), FLAC__bitmath_ilog2_5(i), FLAC__bitmath_ilog2_6(i) );
for ( i = -256; i <= 256; i++ )
printf ("%11d %2u %2u %2u %2u %2u %2u\n", i, FLAC__bitmath_silog2_1(i), FLAC__bitmath_silog2_2(i), FLAC__bitmath_silog2_3(i), FLAC__bitmath_silog2_4(i), FLAC__bitmath_silog2_5(i), FLAC__bitmath_silog2_6(i) );
printf ("\n");
fflush (stdout);
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += i;
t = gettime () - t;
printf ("%6.1f nsec for NOP\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_1 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_1\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_2 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_2\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_3 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_3\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_4 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_4\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_5 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_5\n", 10*t );
t = gettime ();
for ( i = 1; i <= 100000000; i++ )
sum += FLAC__bitmath_ilog2_6 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_ilog2_6\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += i;
t = gettime () - t;
printf ("%6.1f nsec for NOP\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_1 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_1\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_2 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_2\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_3 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_3\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_4 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_4\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_5 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_5\n", 10*t );
t = gettime ();
for ( i = -50000000; i < 50000000; i++ )
sum += FLAC__bitmath_silog2_6 (i);
t = gettime () - t;
printf ("%6.1f nsec for FLAC__bitmath_silog2_6\n", 10*t );
return 0;
}
#endif
More information about the Flac-dev
mailing list