Added automatic detection of SSE support for x86 and force on for x64
parent
fa6b5d4290
commit
a93ec7b95f
|
@ -924,4 +924,10 @@ long _dumb_it_read_sample_data_adpcm4(IT_SAMPLE *sample, DUMBFILE *f);
|
||||||
|
|
||||||
void _dumb_it_interleave_stereo_sample(IT_SAMPLE *sample);
|
void _dumb_it_interleave_stereo_sample(IT_SAMPLE *sample);
|
||||||
|
|
||||||
|
/* Calling either of these is optional */
|
||||||
|
void _dumb_init_cubic();
|
||||||
|
#ifdef _USE_SSE
|
||||||
|
void _dumb_init_sse();
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* INTERNAL_IT_H */
|
#endif /* INTERNAL_IT_H */
|
||||||
|
|
|
@ -65,7 +65,7 @@ long dumb_resample(DUMB_RESAMPLER *resampler, sample_t *dst, long dst_size, VOLU
|
||||||
|
|
||||||
if (VOLUMES_ARE_ZERO) dst = NULL;
|
if (VOLUMES_ARE_ZERO) dst = NULL;
|
||||||
|
|
||||||
init_cubic();
|
_dumb_init_cubic();
|
||||||
|
|
||||||
quality = resampler->quality;
|
quality = resampler->quality;
|
||||||
|
|
||||||
|
@ -382,7 +382,7 @@ void dumb_resample_get_current_sample(DUMB_RESAMPLER *resampler, VOLUME_PARAMETE
|
||||||
|
|
||||||
if (VOLUMES_ARE_ZERO) { MIX_ZEROS(=); return; }
|
if (VOLUMES_ARE_ZERO) { MIX_ZEROS(=); return; }
|
||||||
|
|
||||||
init_cubic();
|
_dumb_init_cubic();
|
||||||
|
|
||||||
quality = resampler->quality;
|
quality = resampler->quality;
|
||||||
|
|
||||||
|
|
|
@ -160,12 +160,11 @@ int dumb_resampling_quality = DUMB_RQ_CUBIC;
|
||||||
|
|
||||||
static short cubicA0[1025], cubicA1[1025];
|
static short cubicA0[1025], cubicA1[1025];
|
||||||
|
|
||||||
/*static*/ void init_cubic(void)
|
void _dumb_init_cubic(void)
|
||||||
{
|
{
|
||||||
unsigned int t; /* 3*1024*1024*1024 is within range if it's unsigned */
|
unsigned int t; /* 3*1024*1024*1024 is within range if it's unsigned */
|
||||||
static int done = 0;
|
static int done = 0;
|
||||||
if (done) return;
|
if (done) return;
|
||||||
done = 1;
|
|
||||||
for (t = 0; t < 1025; t++) {
|
for (t = 0; t < 1025; t++) {
|
||||||
/* int casts to pacify warnings about negating unsigned values */
|
/* int casts to pacify warnings about negating unsigned values */
|
||||||
cubicA0[t] = -(int)( t*t*t >> 17) + (int)( t*t >> 6) - (int)(t << 3);
|
cubicA0[t] = -(int)( t*t*t >> 17) + (int)( t*t >> 6) - (int)(t << 3);
|
||||||
|
@ -173,6 +172,8 @@ static short cubicA0[1025], cubicA1[1025];
|
||||||
}
|
}
|
||||||
|
|
||||||
lanczos_init();
|
lanczos_init();
|
||||||
|
|
||||||
|
done = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "dumb.h"
|
#include "dumb.h"
|
||||||
#include "internal/dumb.h"
|
#include "internal/dumb.h"
|
||||||
|
@ -786,11 +787,56 @@ static void it_filter_sse(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample
|
||||||
|
|
||||||
#undef LOG10
|
#undef LOG10
|
||||||
|
|
||||||
int _dumb_it_use_sse = 0;
|
#if defined(_M_IX86) || defined(__i386__)
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
static inline void
|
||||||
|
__cpuid(int *data, int selector)
|
||||||
|
{
|
||||||
|
asm("cpuid"
|
||||||
|
: "=a" (data[0]),
|
||||||
|
"=b" (data[1]),
|
||||||
|
"=c" (data[2]),
|
||||||
|
"=d" (data[3])
|
||||||
|
: "a"(selector));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int query_cpu_feature_sse() {
|
||||||
|
int buffer[4];
|
||||||
|
__cpuid(buffer,1);
|
||||||
|
if ((buffer[3]&(1<<25)) == 0) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _dumb_it_use_sse = 0;
|
||||||
|
|
||||||
|
void _dumb_init_sse()
|
||||||
|
{
|
||||||
|
static int initialized = 0;
|
||||||
|
if (!initialized)
|
||||||
|
{
|
||||||
|
_dumb_it_use_sse = query_cpu_feature_sse();
|
||||||
|
initialized = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_M_X64) || defined(__amd64__)
|
||||||
|
|
||||||
|
static const int _dumb_it_use_sse = 1;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static const int _dumb_it_use_sse = 0;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static void it_filter(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, long pos, sample_t *src, long size, int step, int sampfreq, int cutoff, int resonance)
|
static void it_filter(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, long pos, sample_t *src, long size, int step, int sampfreq, int cutoff, int resonance)
|
||||||
{
|
{
|
||||||
#if defined(_USE_SSE)
|
#if defined(_USE_SSE)
|
||||||
|
_dumb_init_sse();
|
||||||
if ( _dumb_it_use_sse ) it_filter_sse( cr, state, dst, pos, src, size, step, sampfreq, cutoff, resonance );
|
if ( _dumb_it_use_sse ) it_filter_sse( cr, state, dst, pos, src, size, step, sampfreq, cutoff, resonance );
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -938,7 +984,7 @@ static void reset_channel_effects(IT_CHANNEL *channel)
|
||||||
channel->xm_volslide = 0;
|
channel->xm_volslide = 0;
|
||||||
channel->panslide = 0;
|
channel->panslide = 0;
|
||||||
channel->channelvolslide = 0;
|
channel->channelvolslide = 0;
|
||||||
channel->arpeggio_table = &arpeggio_mod;
|
channel->arpeggio_table = (const unsigned char *) &arpeggio_mod;
|
||||||
memset(channel->arpeggio_offsets, 0, sizeof(channel->arpeggio_offsets));
|
memset(channel->arpeggio_offsets, 0, sizeof(channel->arpeggio_offsets));
|
||||||
channel->retrig = 0;
|
channel->retrig = 0;
|
||||||
if (channel->xm_retrig) {
|
if (channel->xm_retrig) {
|
||||||
|
@ -2490,7 +2536,7 @@ Yxy This uses a table 4 times larger (hence 4 times slower) than
|
||||||
channel->arpeggio_offsets[0] = 0;
|
channel->arpeggio_offsets[0] = 0;
|
||||||
channel->arpeggio_offsets[1] = (v & 0xF0) >> 4;
|
channel->arpeggio_offsets[1] = (v & 0xF0) >> 4;
|
||||||
channel->arpeggio_offsets[2] = (v & 0x0F);
|
channel->arpeggio_offsets[2] = (v & 0x0F);
|
||||||
channel->arpeggio_table = ((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))==IT_WAS_AN_XM) ? &arpeggio_xm : &arpeggio_mod;
|
channel->arpeggio_table = (const unsigned char *)(((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))==IT_WAS_AN_XM) ? &arpeggio_xm : &arpeggio_mod);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IT_SET_CHANNEL_VOLUME:
|
case IT_SET_CHANNEL_VOLUME:
|
||||||
|
@ -3124,15 +3170,15 @@ Yxy This uses a table 4 times larger (hence 4 times slower) than
|
||||||
switch (entry->effect)
|
switch (entry->effect)
|
||||||
{
|
{
|
||||||
case IT_OKT_ARPEGGIO_3:
|
case IT_OKT_ARPEGGIO_3:
|
||||||
channel->arpeggio_table = &arpeggio_okt_3;
|
channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_3;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IT_OKT_ARPEGGIO_4:
|
case IT_OKT_ARPEGGIO_4:
|
||||||
channel->arpeggio_table = &arpeggio_okt_4;
|
channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_4;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IT_OKT_ARPEGGIO_5:
|
case IT_OKT_ARPEGGIO_5:
|
||||||
channel->arpeggio_table = &arpeggio_okt_5;
|
channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_5;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue