[Flac-dev] Fast bit stuff

Sun Jul 22 12:15:06 PDT 2001

Hope this helps. Take the best ideas and forget the rest.
Sorry about the bad documentation.

----------------------------------------------------------------------

/* libFLAC - Free Lossless Audio Codec library
 * Copyright (C) 2001  Josh Coalson
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA  02111-1307, USA.
 */

//#define TEST

#ifndef TEST
# include "private/bitmath.h"
# include "FLAC/assert.h"
#else
# define FLAC__ASSERT(x) (void)(x)
#endif

/*
 *  Typedefs for better compatibilty. You get the ability to express what
 *  you really need instead of some blurry and fuzzy words. These are C99
 *  names, so it conflicts with future compilers. May be all this types
 *  should prepended by something like 'flac_' or 'flac__' or 'FLAC__'.
 */

typedef unsigned char       uint8_t;
typedef unsigned short      uint16_t;
typedef unsigned int        uint32_t;
typedef unsigned long long  uint64_t;			// unsigned __int64 for MSC and Intel-C
typedef unsigned int        uint_least8_t;
typedef unsigned int        uint_least16_t;
typedef unsigned int        uint_least32_t;
typedef unsigned long long  uint_least64_t;		// unsigned __int64 for MSC and Intel-C
typedef signed char         int8_t;
typedef signed short        int16_t;
typedef signed int          int32_t;
typedef signed long long    int64_t;			// signed __int64 for MSC and Intel-C
typedef signed int          int_least8_t;
typedef signed int          int_least16_t;
typedef signed int          int_least32_t;
typedef signed long long    int_least64_t;		// signed __int64 for MSC and Intel-C
typedef float               ieee754_float32_t;
typedef double              ieee754_float64_t;
typedef long double         ieee854_float80_t;		// not available for MSC > 4.0, with compiler option for Intel-C
/* typedef unsigned int     size_t; */
/* typedef signed int       ssize_t; */

/*
 *  This is a helper table for fast lookup. It contains 
 *
 *    for  x = 0:        -2
 *    for  0 < x < 256:  floor ( log2 (x) ) 
 *
 */

static const signed char  helper [256] = {
       -2,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
	5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
	6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
	6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};

/*
 *  Calculates 
 *
 *    for  x = 0:          assert ()
 *    for  0 < x < 2^32:   floor ( log2 (x) )
 *
 */

unsigned int  FLAC__bitmath_ilog2_1 ( uint_least32_t v )
{
	unsigned int  l = 0;
	FLAC__ASSERT (v > 0);

	while ( v >>= 1 )
		l++;
	return l;
}

unsigned int  FLAC__bitmath_ilog2_2 ( uint_least32_t v )
{
	unsigned int  l = 0;
	FLAC__ASSERT (v > 0);

        if ( v > 0xFFFF ) l += 16, v >>= 16;
        if ( v > 0x00FF ) l +=  8, v >>=  8;
        if ( v > 0x000F ) l +=  4, v >>=  4;
        if ( v > 0x0003 ) l +=  2, v >>=  2;
        if ( v > 0x0001 ) l +=  1, v >>=  1;
	return l;
}

unsigned int  FLAC__bitmath_ilog2_3 ( uint_least32_t v )
{
	FLAC__ASSERT (v > 0);

	if ( v > 0xFF ) {
		if ( v > 0xFFFF ) {
			if ( v > 0xFFFFFF ) {
				return helper [v >> 24] + 24;
			}
	    		return helper [v >> 16] + 16;
		}
		return helper [v >> 8] + 8;
	}
	return helper[v];
}

/*
 *  The limitation comes from the fact that a double => ieee754_float32_t
 *  convertion can round of numbers with are not depictable by
 *  ieee754_float32_t, for instance (ieee754_float32_t)0x7FFFFFFF = 2147483648.0f
 */

unsigned int  FLAC__bitmath_ilog2_4 ( ieee754_float32_t v )	/* |v| must be smaller than 2^25 */
{
	FLAC__ASSERT (v > 0);
        return ( *(uint32_t*)(&v) >> 23 ) - 127;
}

unsigned int  FLAC__bitmath_ilog2_5 ( ieee754_float64_t v )	/* |v| must be smaller than 2^52 */
{
	FLAC__ASSERT (v > 0);

#ifdef WORDS_BIG_ENDIAN	
        return ( ((uint32_t*)(&v))[0] >> 20 ) - 1023;
#else
        return ( ((uint32_t*)(&v))[1] >> 20 ) - 1023;
#endif	
}

unsigned int  FLAC__bitmath_ilog2_6 ( ieee854_float80_t v )	/* |v| must be smaller than 2^64 */
{
	FLAC__ASSERT (v > 0);

#ifdef WORDS_BIG_ENDIAN	
        return ( ((uint32_t*)(&v))[0] >> 16    ) - 16383;
#else
        return ( ((uint32_t*)(&v))[2] & 0x7FFF ) - 16383;
#endif	
}

/*
 *  Calculates 
 *
 *    for  -2^31 <= x < -1:   2 + floor ( log2 (-1-x) )
 *    for  x = -1:            2
 *    for  x = 0:             0
 *    for  0 < x < 2^31:      2 + floor ( log2 (x) )
 *
 */

unsigned int  FLAC__bitmath_silog2_1 ( int_least32_t v )
{
	while(1) {
		if(v == 0) {
			return 0;
		}
		else if(v > 0) {
			unsigned l = 0;
			while(v) {
				l++;
				v >>= 1;
			}
			return l+1;
		}
		else if(v == -1) {
			return 2;
		}
		else {
			v = -(++v);
		}
	}
}

unsigned int  FLAC__bitmath_silog2_2 ( int_least32_t v )
{
	if ( v < 0 ) {
		v = ~v;
		if ( v == 0 )
			return 2;
	}

	if ( v > 0xFF ) {
		if ( v > 0xFFFF ) {
			if ( v > 0xFFFFFF ) {
				return helper [v >> 24] + 26;
			}
			return helper [v >> 16] + 18;
		}
		return helper [v >> 8] + 10;
	}
	return helper[v] + 2;
}

unsigned int  FLAC__bitmath_silog2_3 ( int_least32_t v )
{
	ieee754_float32_t  x;

	if ( v <= 0 ) {
		if ( v == 0 )
			return 0;
		v = ~v;
		if ( v == 0 )
			return 2;
	}
	x = v;
        return ( *(uint32_t*)(&x) >> 23 ) - 125;
}

unsigned int  FLAC__bitmath_silog2_4 ( ieee754_float32_t v )	/* |v| must be smaller than 2^25 */
{
	if ( v <= 0 ) {
		if ( v == 0 )
			return 0;
		v = -1.0 - v;
		if ( v == 0 )
			return 2;
	}
        return ( *(uint32_t*)(&v) >> 23 ) - 125;
}

unsigned int  FLAC__bitmath_silog2_5 ( ieee754_float64_t v )	/* |v| must be smaller than 2^52 */
{
	if ( v <= 0 ) {
		if ( v == 0 )
			return 0;
		v = -1.0 - v;
		if ( v == 0 )
			return 2;
	}
#ifdef WORDS_BIG_ENDIAN	
        return ( ((uint32_t*)(&v))[0] >> 20 ) - 1021;
#else
        return ( ((uint32_t*)(&v))[1] >> 20 ) - 1021;
#endif	
}

unsigned int  FLAC__bitmath_silog2_6 ( ieee854_float80_t v )	/* |v| must be smaller than 2^64 */
{
	if ( v <= 0 ) {
		if ( v == 0 )
			return 0;
		v = -1.0L - v;
		if ( v == 0 )
			return 2;
	}
#ifdef WORDS_BIG_ENDIAN	
        return ( ((uint32_t*)(&v))[0] >> 16    ) - 16381;
#else
        return ( ((uint32_t*)(&v))[2] & 0x7FFF ) - 16381;
#endif	
}

#ifdef TEST

/*

   3.2 nsec for NOP
 106.9 nsec for FLAC__bitmath_ilog2_1
   8.6 nsec for FLAC__bitmath_ilog2_2
   3.3 nsec for FLAC__bitmath_ilog2_3
   3.3 nsec for FLAC__bitmath_ilog2_4
   3.2 nsec for NOP
 131.8 nsec for FLAC__bitmath_silog2_1
   3.3 nsec for FLAC__bitmath_silog2_2
   3.2 nsec for FLAC__bitmath_silog2_3
   5.0 nsec for FLAC__bitmath_silog2_4

   3.6 nsec for NOP
 105.9 nsec for FLAC__bitmath_ilog2_1
   3.3 nsec for FLAC__bitmath_ilog2_2
   3.4 nsec for FLAC__bitmath_ilog2_3
   3.2 nsec for FLAC__bitmath_ilog2_4
   3.2 nsec for NOP
 127.4 nsec for FLAC__bitmath_silog2_1
   3.2 nsec for FLAC__bitmath_silog2_2
   5.0 nsec for FLAC__bitmath_silog2_3
   4.9 nsec for FLAC__bitmath_silog2_4

   3.3 nsec for NOP
 105.5 nsec for FLAC__bitmath_ilog2_1
   3.2 nsec for FLAC__bitmath_ilog2_2
   3.3 nsec for FLAC__bitmath_ilog2_3
   3.3 nsec for FLAC__bitmath_ilog2_4
   3.3 nsec for NOP
 127.0 nsec for FLAC__bitmath_silog2_1
   3.4 nsec for FLAC__bitmath_silog2_2
   5.0 nsec for FLAC__bitmath_silog2_3
   9.2 nsec for FLAC__bitmath_silog2_4

Note: 
	Times for FLAC__bitmath_ilog2_1 and FLAC__bitmath_silog2_1 are better in reality (residuals are generally small),
	for FLAC__bitmath_ilog2_2 and FLAC__bitmath_silog2_2 are not so good (mispedicted jumps if CPU don't have
	conditional increments and shifts. FLAC__bitmath_silog2_4 is not so good for Intel IA32, Compares are normally slow.

*/                         

#include <stdio.h>
#include <time.h>

static double  gettime ( void )
{
	return clock() * (1./CLOCKS_PER_SEC);
}

int  main ( void )
{
        int     i;
	int     sum = 0;
	double  t;

	for ( i = 1; i <= 256; i++ )
    		printf ("%11d %2u %2u %2u %2u %2u %2u\n", i, FLAC__bitmath_ilog2_1(i), FLAC__bitmath_ilog2_2(i), FLAC__bitmath_ilog2_3(i), FLAC__bitmath_ilog2_4(i), FLAC__bitmath_ilog2_5(i), FLAC__bitmath_ilog2_6(i) );

	for ( i = -256; i <= 256; i++ )
    		printf ("%11d %2u %2u %2u %2u %2u %2u\n", i, FLAC__bitmath_silog2_1(i), FLAC__bitmath_silog2_2(i), FLAC__bitmath_silog2_3(i), FLAC__bitmath_silog2_4(i), FLAC__bitmath_silog2_5(i), FLAC__bitmath_silog2_6(i) );

    	printf ("\n");
    	fflush (stdout);

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += i;
        t = gettime () - t;
	printf ("%6.1f nsec for NOP\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_1 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_1\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_2 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_2\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_3 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_3\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_4 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_4\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_5 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_5\n", 10*t );

        t = gettime ();
	for ( i = 1; i <= 100000000; i++ )
		sum += FLAC__bitmath_ilog2_6 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_ilog2_6\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += i;
        t = gettime () - t;
	printf ("%6.1f nsec for NOP\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_1 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_1\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_2 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_2\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_3 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_3\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_4 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_4\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_5 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_5\n", 10*t );

        t = gettime ();
	for ( i = -50000000; i < 50000000; i++ )
		sum += FLAC__bitmath_silog2_6 (i);
        t = gettime () - t;
	printf ("%6.1f nsec for FLAC__bitmath_silog2_6\n", 10*t );

	return 0;
}

#endif