Updated ft2play, and implemented SSE optimizations in the Lanczos sinc resampler
parent
a8b47ea0ed
commit
7ae9f88e2a
|
@ -1,11 +1,31 @@
|
|||
#ifndef _LANCZOS_RESAMPLER_H_
|
||||
#define _LANCZOS_RESAMPLER_H_
|
||||
|
||||
void lanczos_init();
|
||||
// Ugglay
|
||||
#ifdef LANCZOS_DECORATE
|
||||
#define PASTE(a,b) a ## b
|
||||
#define EVALUATE(a,b) PASTE(a,b)
|
||||
#define lanczos_init EVALUATE(LANCZOS_DECORATE,_lanczos_init)
|
||||
#define lanczos_resampler_create EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_create)
|
||||
#define lanczos_resampler_delete EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_delete)
|
||||
#define lanczos_resampler_dup EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_dup)
|
||||
#define lanczos_resampler_dup_inplace EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_dup_inplace)
|
||||
#define lanczos_resampler_get_free_count EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_get_free_count)
|
||||
#define lanczos_resampler_write_sample EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_write_sample)
|
||||
#define lanczos_resampler_set_rate EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_set_rate)
|
||||
#define lanczos_resampler_ready EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_ready)
|
||||
#define lanczos_resampler_clear EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_clear)
|
||||
#define lanczos_resampler_get_sample_count EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_get_sample_count)
|
||||
#define lanczos_resampler_get_sample EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_get_sample)
|
||||
#define lanczos_resampler_remove_sample EVALUATE(LANCZOS_DECORATE,_lanczos_resampler_remove_sample)
|
||||
#endif
|
||||
|
||||
void * lanczos_resampler_create();
|
||||
void lanczos_init(void);
|
||||
|
||||
void * lanczos_resampler_create(void);
|
||||
void lanczos_resampler_delete(void *);
|
||||
void * lanczos_resampler_dup(void *);
|
||||
void * lanczos_resampler_dup(const void *);
|
||||
void lanczos_resampler_dup_inplace(void *, const void *);
|
||||
|
||||
int lanczos_resampler_get_free_count(void *);
|
||||
void lanczos_resampler_write_sample(void *, short sample);
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
#include <string.h>
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
#if (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__))
|
||||
#include <xmmintrin.h>
|
||||
#define LANCZOS_SSE
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
@ -10,29 +14,60 @@
|
|||
#include "internal/lanczos_resampler.h"
|
||||
|
||||
enum { LANCZOS_RESOLUTION = 8192 };
|
||||
enum { LANCZOS_WIDTH = 8 };
|
||||
enum { LANCZOS_WIDTH = 16 };
|
||||
enum { LANCZOS_SAMPLES = LANCZOS_RESOLUTION * LANCZOS_WIDTH };
|
||||
|
||||
static double lanczos_lut[LANCZOS_SAMPLES + 1];
|
||||
static float lanczos_lut[LANCZOS_SAMPLES + 1];
|
||||
|
||||
enum { lanczos_buffer_size = LANCZOS_WIDTH * 4 };
|
||||
|
||||
int fEqual(const double b, const double a)
|
||||
static int fEqual(const float b, const float a)
|
||||
{
|
||||
return fabs(a - b) < 1.0e-6;
|
||||
}
|
||||
|
||||
static double sinc(double x)
|
||||
static float sinc(float x)
|
||||
{
|
||||
return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI);
|
||||
}
|
||||
|
||||
void lanczos_init()
|
||||
#ifdef LANCZOS_SSE
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#elif defined(__clang__) || defined(__GNUC__)
|
||||
static inline void
|
||||
__cpuid(int *data, int selector)
|
||||
{
|
||||
asm("cpuid"
|
||||
: "=a" (data[0]),
|
||||
"=b" (data[1]),
|
||||
"=c" (data[2]),
|
||||
"=d" (data[3])
|
||||
: "a"(selector));
|
||||
}
|
||||
#else
|
||||
#define __cpuid(a,b) memset((a), 0, sizeof(int) * 4)
|
||||
#endif
|
||||
|
||||
static int query_cpu_feature_sse() {
|
||||
int buffer[4];
|
||||
__cpuid(buffer,1);
|
||||
if ((buffer[3]&(1<<25)) == 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int lanczos_has_sse = 0;
|
||||
#endif
|
||||
|
||||
void lanczos_init(void)
|
||||
{
|
||||
unsigned i;
|
||||
double dx = (double)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0;
|
||||
float dx = (float)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0;
|
||||
for (i = 0; i < LANCZOS_SAMPLES + 1; ++i, x += dx)
|
||||
lanczos_lut[i] = abs(x) < LANCZOS_WIDTH ? sinc(x) * sinc(x / LANCZOS_WIDTH) : 0.0;
|
||||
lanczos_lut[i] = fabs(x) < LANCZOS_WIDTH ? sinc(x) * sinc(x / LANCZOS_WIDTH) : 0.0;
|
||||
#ifdef LANCZOS_SSE
|
||||
lanczos_has_sse = query_cpu_feature_sse();
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct lanczos_resampler
|
||||
|
@ -45,7 +80,7 @@ typedef struct lanczos_resampler
|
|||
int buffer_out[lanczos_buffer_size];
|
||||
} lanczos_resampler;
|
||||
|
||||
void * lanczos_resampler_create()
|
||||
void * lanczos_resampler_create(void)
|
||||
{
|
||||
lanczos_resampler * r = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) );
|
||||
if ( !r ) return 0;
|
||||
|
@ -67,9 +102,9 @@ void lanczos_resampler_delete(void * _r)
|
|||
free( _r );
|
||||
}
|
||||
|
||||
void * lanczos_resampler_dup(void * _r)
|
||||
void * lanczos_resampler_dup(const void * _r)
|
||||
{
|
||||
lanczos_resampler * r_in = ( lanczos_resampler * ) _r;
|
||||
const lanczos_resampler * r_in = ( const lanczos_resampler * ) _r;
|
||||
lanczos_resampler * r_out = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) );
|
||||
if ( !r_out ) return 0;
|
||||
|
||||
|
@ -85,6 +120,21 @@ void * lanczos_resampler_dup(void * _r)
|
|||
return r_out;
|
||||
}
|
||||
|
||||
void lanczos_resampler_dup_inplace(void *_d, const void *_s)
|
||||
{
|
||||
const lanczos_resampler * r_in = ( const lanczos_resampler * ) _s;
|
||||
lanczos_resampler * r_out = ( lanczos_resampler * ) _d;
|
||||
|
||||
r_out->write_pos = r_in->write_pos;
|
||||
r_out->write_filled = r_in->write_filled;
|
||||
r_out->read_pos = r_in->read_pos;
|
||||
r_out->read_filled = r_in->read_filled;
|
||||
r_out->phase = r_in->phase;
|
||||
r_out->phase_inc = r_in->phase_inc;
|
||||
memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) );
|
||||
memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) );
|
||||
}
|
||||
|
||||
int lanczos_resampler_get_free_count(void *_r)
|
||||
{
|
||||
lanczos_resampler * r = ( lanczos_resampler * ) _r;
|
||||
|
@ -149,10 +199,10 @@ static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_e
|
|||
do
|
||||
{
|
||||
// accumulate in extended precision
|
||||
double kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0;
|
||||
float kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0;
|
||||
int i = LANCZOS_WIDTH;
|
||||
int phase_adj = phase * step / LANCZOS_RESOLUTION;
|
||||
double sample;
|
||||
float sample;
|
||||
|
||||
if ( out >= out_end )
|
||||
break;
|
||||
|
@ -164,7 +214,7 @@ static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_e
|
|||
}
|
||||
for (sample = 0, i = 0; i < LANCZOS_WIDTH * 2; ++i)
|
||||
sample += in[i] * kernel[i];
|
||||
*out++ = (int) (sample / kernel_sum * 256.0);
|
||||
*out++ = (int)(sample / kernel_sum * 256.0);
|
||||
|
||||
phase += phase_inc;
|
||||
|
||||
|
@ -174,10 +224,10 @@ static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_e
|
|||
}
|
||||
while ( in < in_end );
|
||||
|
||||
r->phase = phase;
|
||||
r->phase = (unsigned short) phase;
|
||||
*out_ = out;
|
||||
|
||||
used = in - in_;
|
||||
used = (int)(in - in_);
|
||||
|
||||
r->write_filled -= used;
|
||||
}
|
||||
|
@ -185,6 +235,79 @@ static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_e
|
|||
return used;
|
||||
}
|
||||
|
||||
#ifdef LANCZOS_SSE
|
||||
static int lanczos_resampler_run_sse(lanczos_resampler * r, int ** out_, int * out_end)
|
||||
{
|
||||
int in_size = r->write_filled;
|
||||
float const* in_ = r->buffer_in + lanczos_buffer_size + r->write_pos - r->write_filled;
|
||||
int used = 0;
|
||||
in_size -= LANCZOS_WIDTH * 2;
|
||||
if ( in_size > 0 )
|
||||
{
|
||||
int* out = *out_;
|
||||
float const* in = in_;
|
||||
float const* const in_end = in + in_size;
|
||||
int phase = r->phase;
|
||||
int phase_inc = r->phase_inc;
|
||||
|
||||
int step = phase_inc > LANCZOS_RESOLUTION ? LANCZOS_RESOLUTION * LANCZOS_RESOLUTION / phase_inc : LANCZOS_RESOLUTION;
|
||||
|
||||
do
|
||||
{
|
||||
// accumulate in extended precision
|
||||
float kernel_sum = 0.0;
|
||||
__m128 kernel[LANCZOS_WIDTH / 2];
|
||||
__m128 temp1, temp2;
|
||||
__m128 samplex = _mm_setzero_ps();
|
||||
float *kernelf = (float*)(&kernel);
|
||||
int i = LANCZOS_WIDTH;
|
||||
int phase_adj = phase * step / LANCZOS_RESOLUTION;
|
||||
|
||||
if ( out >= out_end )
|
||||
break;
|
||||
|
||||
for (; i >= -LANCZOS_WIDTH + 1; --i)
|
||||
{
|
||||
int pos = i * step;
|
||||
kernel_sum += kernelf[i + LANCZOS_WIDTH - 1] = lanczos_lut[abs(phase_adj - pos)];
|
||||
}
|
||||
for (i = 0; i < LANCZOS_WIDTH / 2; ++i)
|
||||
{
|
||||
temp1 = _mm_loadu_ps( (const float *)( in + i * 4 ) );
|
||||
temp2 = _mm_load_ps( (const float *)( kernel + i ) );
|
||||
temp1 = _mm_mul_ps( temp1, temp2 );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
}
|
||||
kernel_sum = 1.0 / kernel_sum * (1.0 / 32768.0);
|
||||
temp1 = _mm_movehl_ps( temp1, samplex );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
temp1 = samplex;
|
||||
temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
temp1 = _mm_set_ss( kernel_sum );
|
||||
samplex = _mm_mul_ps( samplex, temp1 );
|
||||
*out++ = _mm_cvtss_si32( samplex );
|
||||
|
||||
phase += phase_inc;
|
||||
|
||||
in += phase >> 13;
|
||||
|
||||
phase &= 8191;
|
||||
}
|
||||
while ( in < in_end );
|
||||
|
||||
r->phase = (unsigned short) phase;
|
||||
*out_ = out;
|
||||
|
||||
used = (int)(in - in_);
|
||||
|
||||
r->write_filled -= used;
|
||||
}
|
||||
|
||||
return used;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void lanczos_resampler_fill(lanczos_resampler * r)
|
||||
{
|
||||
while ( r->write_filled > (LANCZOS_WIDTH * 2) &&
|
||||
|
@ -195,6 +318,11 @@ static void lanczos_resampler_fill(lanczos_resampler * r)
|
|||
int * out = r->buffer_out + write_pos;
|
||||
if ( write_size > ( lanczos_buffer_size - r->read_filled ) )
|
||||
write_size = lanczos_buffer_size - r->read_filled;
|
||||
#ifdef LANCZOS_SSE
|
||||
if ( lanczos_has_sse )
|
||||
lanczos_resampler_run_sse( r, &out, out + write_size );
|
||||
else
|
||||
#endif
|
||||
lanczos_resampler_run( r, &out, out + write_size );
|
||||
r->read_filled += out - r->buffer_out - write_pos;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
** FT2PLAY v0.34
|
||||
** FT2PLAY v0.35
|
||||
** =============
|
||||
**
|
||||
** C port of FastTracker II's replayer, by 8bitbubsy (Olav Sørensen)
|
||||
|
@ -1467,6 +1467,8 @@ static int16_t RelocateTon(PLAYER *p, int16_t inPeriod, int8_t addNote, StmTyp *
|
|||
outPeriod = (((oldPeriod + addPeriod) >> 1) & 0xFFE0) + fineTune;
|
||||
if (outPeriod < fineTune) outPeriod += (1 << 8);
|
||||
|
||||
if (((outPeriod - 16) >> 1) < ((12 * 10 * 16) + 16)) // non-FT2 security fix
|
||||
{
|
||||
if (inPeriod >= p->Note2Period[(outPeriod - 16) >> 1]) // 16-bit look-up, shift it down
|
||||
{
|
||||
outPeriod -= fineTune;
|
||||
|
@ -1480,13 +1482,13 @@ static int16_t RelocateTon(PLAYER *p, int16_t inPeriod, int8_t addNote, StmTyp *
|
|||
oldPeriod = (int16_t)(outPeriod);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outPeriod = oldPeriod + fineTune;
|
||||
if (outPeriod < fineTune) outPeriod += (1 << 8);
|
||||
outPeriod += ((int16_t)(addNote) << 5);
|
||||
|
||||
if (outPeriod >= ((((8 * 12 * 16) + 15) * 2) - 1)) outPeriod = ((8 * 12 * 16) + 15) * 2;
|
||||
|
||||
return (p->Note2Period[outPeriod >> 1]); // 16-bit look-up, shift it down
|
||||
}
|
||||
|
||||
|
@ -2260,6 +2262,10 @@ static int8_t LoadInstrHeader(PLAYER *p, MEM *buf, uint16_t i)
|
|||
mread(&ih.Samp[j], 17, 1, buf);
|
||||
mseek(buf, 1 + 22, SEEK_CUR); // skip junk + name
|
||||
memcpy(&p->Instr[i]->Samp[j], &ih.Samp[j], 17);
|
||||
|
||||
// non-FT2 fix: Force loop flags off if loop length is 0
|
||||
if (p->Instr[i]->Samp[j].RepL == 0)
|
||||
p->Instr[i]->Samp[j].Typ &= 0xFC;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2681,13 +2687,6 @@ void voiceSetSource(PLAYER *p, uint8_t i, const int8_t *sampleData,
|
|||
p->voice[i].rampTerminates = 0;
|
||||
#endif
|
||||
|
||||
// for 9xx set offset
|
||||
if (p->voice[i].samplePosition >= p->voice[i].sampleLength)
|
||||
{
|
||||
p->voice[i].sampleData = NULL;
|
||||
p->voice[i].samplePosition = 0;
|
||||
}
|
||||
|
||||
lanczos_resampler_clear(p->resampler[i]);
|
||||
#ifdef USE_VOL_RAMP
|
||||
lanczos_resampler_clear(p->resampler[i+254]);
|
||||
|
@ -2699,6 +2698,12 @@ void voiceSetSource(PLAYER *p, uint8_t i, const int8_t *sampleData,
|
|||
void voiceSetSamplePosition(PLAYER *p, uint8_t i, uint16_t value)
|
||||
{
|
||||
p->voice[i].samplePosition = value;
|
||||
if (p->voice[i].samplePosition >= p->voice[i].sampleLength)
|
||||
{
|
||||
p->voice[i].samplePosition = 0;
|
||||
p->voice[i].sampleData = NULL;
|
||||
}
|
||||
|
||||
p->voice[i].interpolating = 1;
|
||||
p->voice[i].oversampleCount = 0;
|
||||
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
#include <string.h>
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
#if (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__))
|
||||
#include <xmmintrin.h>
|
||||
#define LANCZOS_SSE
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
@ -10,29 +14,60 @@
|
|||
#include "lanczos_resampler.h"
|
||||
|
||||
enum { LANCZOS_RESOLUTION = 8192 };
|
||||
enum { LANCZOS_WIDTH = 8 };
|
||||
enum { LANCZOS_WIDTH = 16 };
|
||||
enum { LANCZOS_SAMPLES = LANCZOS_RESOLUTION * LANCZOS_WIDTH };
|
||||
|
||||
static double lanczos_lut[LANCZOS_SAMPLES + 1];
|
||||
static float lanczos_lut[LANCZOS_SAMPLES + 1];
|
||||
|
||||
enum { lanczos_buffer_size = LANCZOS_WIDTH * 4 };
|
||||
|
||||
static int fEqual(const double b, const double a)
|
||||
static int fEqual(const float b, const float a)
|
||||
{
|
||||
return fabs(a - b) < 1.0e-6;
|
||||
}
|
||||
|
||||
static double sinc(double x)
|
||||
static float sinc(float x)
|
||||
{
|
||||
return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI);
|
||||
}
|
||||
|
||||
#ifdef LANCZOS_SSE
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#elif defined(__clang__) || defined(__GNUC__)
|
||||
static inline void
|
||||
__cpuid(int *data, int selector)
|
||||
{
|
||||
asm("cpuid"
|
||||
: "=a" (data[0]),
|
||||
"=b" (data[1]),
|
||||
"=c" (data[2]),
|
||||
"=d" (data[3])
|
||||
: "a"(selector));
|
||||
}
|
||||
#else
|
||||
#define __cpuid(a,b) memset((a), 0, sizeof(int) * 4)
|
||||
#endif
|
||||
|
||||
static int query_cpu_feature_sse() {
|
||||
int buffer[4];
|
||||
__cpuid(buffer,1);
|
||||
if ((buffer[3]&(1<<25)) == 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int lanczos_has_sse = 0;
|
||||
#endif
|
||||
|
||||
void lanczos_init(void)
|
||||
{
|
||||
unsigned i;
|
||||
double dx = (double)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0;
|
||||
float dx = (float)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0;
|
||||
for (i = 0; i < LANCZOS_SAMPLES + 1; ++i, x += dx)
|
||||
lanczos_lut[i] = fabs(x) < LANCZOS_WIDTH ? sinc(x) * sinc(x / LANCZOS_WIDTH) : 0.0;
|
||||
#ifdef LANCZOS_SSE
|
||||
lanczos_has_sse = query_cpu_feature_sse();
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct lanczos_resampler
|
||||
|
@ -164,10 +199,10 @@ static int lanczos_resampler_run(lanczos_resampler * r, float ** out_, float * o
|
|||
do
|
||||
{
|
||||
// accumulate in extended precision
|
||||
double kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0;
|
||||
float kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0;
|
||||
int i = LANCZOS_WIDTH;
|
||||
int phase_adj = phase * step / LANCZOS_RESOLUTION;
|
||||
double sample;
|
||||
float sample;
|
||||
|
||||
if ( out >= out_end )
|
||||
break;
|
||||
|
@ -200,6 +235,80 @@ static int lanczos_resampler_run(lanczos_resampler * r, float ** out_, float * o
|
|||
return used;
|
||||
}
|
||||
|
||||
#ifdef LANCZOS_SSE
|
||||
static int lanczos_resampler_run_sse(lanczos_resampler * r, float ** out_, float * out_end)
|
||||
{
|
||||
int in_size = r->write_filled;
|
||||
float const* in_ = r->buffer_in + lanczos_buffer_size + r->write_pos - r->write_filled;
|
||||
int used = 0;
|
||||
in_size -= LANCZOS_WIDTH * 2;
|
||||
if ( in_size > 0 )
|
||||
{
|
||||
float* out = *out_;
|
||||
float const* in = in_;
|
||||
float const* const in_end = in + in_size;
|
||||
int phase = r->phase;
|
||||
int phase_inc = r->phase_inc;
|
||||
|
||||
int step = phase_inc > LANCZOS_RESOLUTION ? LANCZOS_RESOLUTION * LANCZOS_RESOLUTION / phase_inc : LANCZOS_RESOLUTION;
|
||||
|
||||
do
|
||||
{
|
||||
// accumulate in extended precision
|
||||
float kernel_sum;
|
||||
__m128 kernel[LANCZOS_WIDTH / 2];
|
||||
__m128 temp1, temp2;
|
||||
__m128 samplex = _mm_setzero_ps();
|
||||
float *kernelf = (float*)(&kernel);
|
||||
int i = LANCZOS_WIDTH;
|
||||
int phase_adj = phase * step / LANCZOS_RESOLUTION;
|
||||
|
||||
if ( out >= out_end )
|
||||
break;
|
||||
|
||||
for (; i >= -LANCZOS_WIDTH + 1; --i)
|
||||
{
|
||||
int pos = i * step;
|
||||
kernel_sum += kernelf[i + LANCZOS_WIDTH - 1] = lanczos_lut[abs(phase_adj - pos)];
|
||||
}
|
||||
for (i = 0; i < LANCZOS_WIDTH / 2; ++i)
|
||||
{
|
||||
temp1 = _mm_loadu_ps( (const float *)( in + i * 4 ) );
|
||||
temp2 = _mm_load_ps( (const float *)( kernel + i ) );
|
||||
temp1 = _mm_mul_ps( temp1, temp2 );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
}
|
||||
kernel_sum = 1.0 / kernel_sum * (1.0 / 32768.0);
|
||||
temp1 = _mm_movehl_ps( temp1, samplex );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
temp1 = samplex;
|
||||
temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) );
|
||||
samplex = _mm_add_ps( samplex, temp1 );
|
||||
temp1 = _mm_set_ss( kernel_sum );
|
||||
samplex = _mm_mul_ps( samplex, temp1 );
|
||||
_mm_store_ss( out, samplex );
|
||||
++out;
|
||||
|
||||
phase += phase_inc;
|
||||
|
||||
in += phase >> 13;
|
||||
|
||||
phase &= 8191;
|
||||
}
|
||||
while ( in < in_end );
|
||||
|
||||
r->phase = (unsigned short) phase;
|
||||
*out_ = out;
|
||||
|
||||
used = (int)(in - in_);
|
||||
|
||||
r->write_filled -= used;
|
||||
}
|
||||
|
||||
return used;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void lanczos_resampler_fill(lanczos_resampler * r)
|
||||
{
|
||||
while ( r->write_filled > (LANCZOS_WIDTH * 2) &&
|
||||
|
@ -210,6 +319,11 @@ static void lanczos_resampler_fill(lanczos_resampler * r)
|
|||
float * out = r->buffer_out + write_pos;
|
||||
if ( write_size > ( lanczos_buffer_size - r->read_filled ) )
|
||||
write_size = lanczos_buffer_size - r->read_filled;
|
||||
#ifdef LANCZOS_SSE
|
||||
if ( lanczos_has_sse )
|
||||
lanczos_resampler_run_sse( r, &out, out + write_size );
|
||||
else
|
||||
#endif
|
||||
lanczos_resampler_run( r, &out, out + write_size );
|
||||
r->read_filled += out - r->buffer_out - write_pos;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue