diff --git a/Frameworks/Dumb/dumb/include/dumb.h b/Frameworks/Dumb/dumb/include/dumb.h index 73d51406c..a6d00210d 100644 --- a/Frameworks/Dumb/dumb/include/dumb.h +++ b/Frameworks/Dumb/dumb/include/dumb.h @@ -676,9 +676,10 @@ void dumb_destroy_click_remover_array(int n, DUMB_CLICK_REMOVER **cr); #define DUMB_RQ_ALIASING 0 #define DUMB_RQ_BLEP 1 #define DUMB_RQ_LINEAR 2 -#define DUMB_RQ_CUBIC 3 -#define DUMB_RQ_FIR 4 -#define DUMB_RQ_N_LEVELS 5 +#define DUMB_RQ_BLAM 3 +#define DUMB_RQ_CUBIC 4 +#define DUMB_RQ_FIR 5 +#define DUMB_RQ_N_LEVELS 6 extern int dumb_resampling_quality; /* This specifies the default */ void dumb_it_set_resampling_quality(DUMB_IT_SIGRENDERER * sigrenderer, int quality); /* This overrides it */ @@ -767,6 +768,11 @@ void dumb_resample_get_current_sample_n_2_1(int n, DUMB_RESAMPLER *resampler, DU void dumb_resample_get_current_sample_n_2_2(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst); void dumb_end_resampler_n(int n, DUMB_RESAMPLER *resampler); +/* This sets the default panning separation for hard panned formats, + or for formats with default panning information. This must be set + before using any readers or loaders, and is not really thread safe. */ + +extern int dumb_it_default_panning_separation; /* in percent, default 25 */ /* DUH Construction */ diff --git a/Frameworks/Dumb/dumb/include/internal/resampler.h b/Frameworks/Dumb/dumb/include/internal/resampler.h index 90a98714f..0050ebf1a 100644 --- a/Frameworks/Dumb/dumb/include/internal/resampler.h +++ b/Frameworks/Dumb/dumb/include/internal/resampler.h @@ -36,9 +36,10 @@ enum RESAMPLER_QUALITY_ZOH = 0, RESAMPLER_QUALITY_BLEP = 1, RESAMPLER_QUALITY_LINEAR = 2, - RESAMPLER_QUALITY_CUBIC = 3, - RESAMPLER_QUALITY_SINC = 4, - RESAMPLER_QUALITY_MAX = 4 + RESAMPLER_QUALITY_BLAM = 3, + RESAMPLER_QUALITY_CUBIC = 4, + RESAMPLER_QUALITY_SINC = 5, + RESAMPLER_QUALITY_MAX = 5 }; void resampler_set_quality(void *, int quality); @@ -52,6 +53,6 @@ void resampler_clear(void *); int resampler_get_sample_count(void *); int resampler_get_sample(void *); float resampler_get_sample_float(void *); -void resampler_remove_sample(void *); +void resampler_remove_sample(void *, int decay); #endif diff --git a/Frameworks/Dumb/dumb/src/helpers/resamp3.inc b/Frameworks/Dumb/dumb/src/helpers/resamp3.inc index 12820f268..c270c83e9 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resamp3.inc +++ b/Frameworks/Dumb/dumb/src/helpers/resamp3.inc @@ -114,8 +114,12 @@ long dumb_resample(DUMB_RESAMPLER *resampler, sample_t *dst, long dst_size, VOLU } x = &src[pos*SRC_CHANNELS]; while ( todo ) { - while ( resampler_get_free_count( resampler->fir_resampler[0] ) && - pos >= resampler->start ) + while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) || + (!resampler_get_sample_count( resampler->fir_resampler[0] ) +#if SRC_CHANNELS == 2 + && !resampler_get_sample_count( resampler->fir_resampler[1] ) +#endif + ) ) && pos >= resampler->start ) { POKE_FIR(0); pos--; @@ -159,8 +163,12 @@ long dumb_resample(DUMB_RESAMPLER *resampler, sample_t *dst, long dst_size, VOLU } x = &src[pos*SRC_CHANNELS]; while ( todo ) { - while ( resampler_get_free_count( resampler->fir_resampler[0] ) && - pos < resampler->end ) + while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) || + (!resampler_get_sample_count( resampler->fir_resampler[0] ) +#if SRC_CHANNELS == 2 + && !resampler_get_sample_count( resampler->fir_resampler[1] ) +#endif + ) ) && pos < resampler->end ) { POKE_FIR(0); pos++; diff --git a/Frameworks/Dumb/dumb/src/helpers/resample.c b/Frameworks/Dumb/dumb/src/helpers/resample.c index 987746ee3..4c6852f51 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resample.c +++ b/Frameworks/Dumb/dumb/src/helpers/resample.c @@ -74,13 +74,14 @@ * 0 - DUMB_RQ_ALIASING - fastest * 1 - DUMB_RQ_BLEP - nicer than aliasing, but slower * 2 - DUMB_RQ_LINEAR - * 3 - DUMB_RQ_CUBIC - * 4 - DUMB_RQ_FIR - nicest + * 3 - DUMB_RQ_BLAM - band-limited linear interpolation, nice but slower + * 4 - DUMB_RQ_CUBIC + * 5 - DUMB_RQ_FIR - nicest * * Values outside the range 0-4 will behave the same as the nearest * value within the range. */ -int dumb_resampling_quality = DUMB_RQ_CUBIC; +int dumb_resampling_quality = DUMB_RQ_BLAM; diff --git a/Frameworks/Dumb/dumb/src/helpers/resample.inc b/Frameworks/Dumb/dumb/src/helpers/resample.inc index 7e1e3e2cf..d39de5596 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resample.inc +++ b/Frameworks/Dumb/dumb/src/helpers/resample.inc @@ -141,7 +141,7 @@ DUMB_RESAMPLER *dumb_start_resampler(SRCTYPE *src, int src_channels, long pos, l *dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[0] ), vol ); \ UPDATE_VOLUME( volume, vol ); \ } -#define ADVANCE_FIR resampler_remove_sample( resampler->fir_resampler[0] ) +#define ADVANCE_FIR resampler_remove_sample( resampler->fir_resampler[0], 1 ) #define STEREO_DEST_PEEK_FIR { \ int sample = resampler_get_sample( resampler->fir_resampler[0] ); \ *dst++ = MULSC( sample, lvol ); \ @@ -225,8 +225,8 @@ DUMB_RESAMPLER *dumb_start_resampler(SRCTYPE *src, int src_channels, long pos, l UPDATE_VOLUME( volume_right, rvol ); \ } #define ADVANCE_FIR { \ - resampler_remove_sample( resampler->fir_resampler[0] ); \ - resampler_remove_sample( resampler->fir_resampler[1] ); \ + resampler_remove_sample( resampler->fir_resampler[0], 1 ); \ + resampler_remove_sample( resampler->fir_resampler[1], 1 ); \ } #define STEREO_DEST_PEEK_FIR { \ *dst++ = MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ); \ diff --git a/Frameworks/Dumb/dumb/src/helpers/resampler.c b/Frameworks/Dumb/dumb/src/helpers/resampler.c index 697261be7..5360b9b35 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resampler.c +++ b/Frameworks/Dumb/dumb/src/helpers/resampler.c @@ -6,6 +6,13 @@ #include #define RESAMPLER_SSE #endif +#ifdef __APPLE__ +#include +#if TARGET_CPU_ARM +#include +#define RESAMPLER_NEON +#endif +#endif #ifdef _MSC_VER #define ALIGNED _declspec(align(16)) @@ -27,6 +34,10 @@ enum { SINC_WIDTH = 16 }; enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH }; enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 }; +static const float RESAMPLER_BLEP_CUTOFF = 0.90f; +static const float RESAMPLER_BLAM_CUTOFF = 0.93f; +static const float RESAMPLER_SINC_CUTOFF = 0.999f; + ALIGNED static float cubic_lut[CUBIC_SAMPLES]; static float sinc_lut[SINC_SAMPLES + 1]; @@ -129,10 +140,10 @@ typedef struct resampler { int write_pos, write_filled; int read_pos, read_filled; - unsigned int phase; - unsigned int phase_inc; - unsigned int inv_phase; - unsigned int inv_phase_inc; + float phase; + float phase_inc; + float inv_phase; + float inv_phase_inc; unsigned char quality; signed char delay_added; signed char delay_removed; @@ -173,25 +184,10 @@ void resampler_delete(void * _r) void * resampler_dup(const void * _r) { - const resampler * r_in = ( const resampler * ) _r; - resampler * r_out = ( resampler * ) malloc( sizeof(resampler) ); + void * r_out = malloc( sizeof(resampler) ); if ( !r_out ) return 0; - r_out->write_pos = r_in->write_pos; - r_out->write_filled = r_in->write_filled; - r_out->read_pos = r_in->read_pos; - r_out->read_filled = r_in->read_filled; - r_out->phase = r_in->phase; - r_out->phase_inc = r_in->phase_inc; - r_out->inv_phase = r_in->inv_phase; - r_out->inv_phase_inc = r_in->inv_phase_inc; - r_out->quality = r_in->quality; - r_out->delay_added = r_in->delay_added; - r_out->delay_removed = r_in->delay_removed; - r_out->last_amp = r_in->last_amp; - r_out->accumulator = r_in->accumulator; - memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) ); - memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) ); + resampler_dup_inplace(r_out, _r); return r_out; } @@ -227,7 +223,8 @@ void resampler_set_quality(void *_r, int quality) quality = RESAMPLER_QUALITY_MAX; if ( r->quality != quality ) { - if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP ) + if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP || + quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM ) { r->read_pos = 0; r->read_filled = 0; @@ -257,6 +254,7 @@ static int resampler_min_filled(resampler *r) return 1; case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 2; case RESAMPLER_QUALITY_CUBIC: @@ -275,6 +273,7 @@ static int resampler_input_delay(resampler *r) case RESAMPLER_QUALITY_ZOH: case RESAMPLER_QUALITY_BLEP: case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 0; case RESAMPLER_QUALITY_CUBIC: @@ -297,6 +296,7 @@ static int resampler_output_delay(resampler *r) return 0; case RESAMPLER_QUALITY_BLEP: + case RESAMPLER_QUALITY_BLAM: return SINC_WIDTH - 1; } } @@ -319,16 +319,19 @@ void resampler_clear(void *_r) r->delay_removed = -1; memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); - if (r->quality == RESAMPLER_QUALITY_BLEP) + if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM) + { + r->inv_phase = 0; memset(r->buffer_out, 0, sizeof(r->buffer_out)); + } } void resampler_set_rate(void *_r, double new_factor) { resampler * r = ( resampler * ) _r; - r->phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA ); + r->phase_inc = new_factor; new_factor = 1.0 / new_factor; - r->inv_phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA ); + r->inv_phase_inc = new_factor; } void resampler_write_sample(void *_r, short s) @@ -390,8 +393,8 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -405,9 +408,9 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -422,6 +425,7 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON static int resampler_run_blep(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -434,38 +438,45 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; - int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA; - int i = SINC_WIDTH; float sample; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(phase_reduced - pos); - kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) - out[i] += sample * kernel[i]; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } inv_phase += inv_phase_inc; - out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -480,6 +491,7 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) @@ -494,50 +506,134 @@ static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - // accumulate in extended precision - float kernel_sum = 0.0; - __m128 kernel[SINC_WIDTH / 2]; - __m128 temp1, temp2; - __m128 samplex; float sample; - float *kernelf = (float*)(&kernel); - int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA; - int i = SINC_WIDTH; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(phase_reduced - pos); - kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - samplex = _mm_set1_ps( sample ); - for (i = 0; i < SINC_WIDTH / 2; ++i) + + if (sample) { - temp1 = _mm_load_ps( (const float *)( kernel + i ) ); - temp1 = _mm_mul_ps( temp1, samplex ); - temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); - temp1 = _mm_add_ps( temp1, temp2 ); - _mm_storeu_ps( (float *) out + i * 4, temp1 ); + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } } inv_phase += inv_phase_inc; - out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blep(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -565,8 +661,8 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -575,14 +671,14 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - sample = in[0] + (in[1] - in[0]) * ((float)phase / RESAMPLER_RESOLUTION_EXTRA); + sample = in[0] + (in[1] - in[0]) * phase; *out++ = sample; phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -597,6 +693,287 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * phase; + sample -= last_amp; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + in += (int)phase; + phase = fmod(phase, 1.0f); + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + { + sample += (in[1] - in[0]) * phase; + } + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * fphase; + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -608,8 +985,8 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -620,7 +997,7 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - kernel = cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4; + kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4; for (sample = 0, i = 0; i < 4; ++i) sample += in[i] * kernel[i]; @@ -628,63 +1005,9 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; - } - while ( in < in_end ); - - r->phase = phase; - *out_ = out; - - used = (int)(in - in_); - - r->write_filled -= used; - } - - return used; -} - -#ifdef RESAMPLER_SSE -static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) -{ - int in_size = r->write_filled; - float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; - int used = 0; - in_size -= 4; - if ( in_size > 0 ) - { - float* out = *out_; - float const* in = in_; - float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; - - do - { - __m128 temp1, temp2; - __m128 samplex = _mm_setzero_ps(); - - if ( out >= out_end ) - break; - - temp1 = _mm_loadu_ps( (const float *)( in ) ); - temp2 = _mm_load_ps( (const float *)( cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4 ) ); - temp1 = _mm_mul_ps( temp1, temp2 ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = _mm_movehl_ps( temp1, samplex ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = samplex; - temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); - samplex = _mm_add_ps( samplex, temp1 ); - _mm_store_ss( out, samplex ); - ++out; - - phase += phase_inc; - - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); - - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -700,6 +1023,111 @@ static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end } #endif +#ifdef RESAMPLER_SSE +static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + __m128 temp1, temp2; + __m128 samplex = _mm_setzero_ps(); + + if ( out >= out_end ) + break; + + temp1 = _mm_loadu_ps( (const float *)( in ) ); + temp2 = _mm_load_ps( (const float *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = _mm_mul_ps( temp1, temp2 ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = _mm_movehl_ps( temp1, samplex ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = samplex; + temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); + samplex = _mm_add_ps( samplex, temp1 ); + _mm_store_ss( out, samplex ); + ++out; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float32x4_t temp1, temp2; + float32x2_t half; + + if ( out >= out_end ) + break; + + temp1 = vld1q_f32( (const float32_t *)( in ) ); + temp2 = vld1q_f32( (const float32_t *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = vmulq_f32( temp1, temp2 ); + half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -711,17 +1139,17 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do { float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; int i = SINC_WIDTH; - int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; float sample; @@ -740,9 +1168,9 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -756,6 +1184,7 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) @@ -769,10 +1198,10 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do @@ -784,7 +1213,7 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) __m128 samplex = _mm_setzero_ps(); float *kernelf = (float*)(&kernel); int i = SINC_WIDTH; - int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; if ( out >= out_end ) @@ -816,9 +1245,80 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + // accumulate in extended precision + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex = {0}; + float32x2_t half; + float *kernelf = (float*)(&kernel); + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) ); + temp2 = vld1q_f32( (const float32_t *)( kernel + i ) ); + samplex = vmlaq_f32( samplex, temp1, temp2 ); + } + kernel_sum = 1.0 / kernel_sum; + samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum)); + half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -861,9 +1361,11 @@ static void resampler_fill(resampler * r) if ( write_extra > SINC_WIDTH * 2 - 1 ) write_extra = SINC_WIDTH * 2 - 1; memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE if ( resampler_has_sse ) used = resampler_run_blep_sse( r, &out, out + write_size + write_extra ); else +#endif used = resampler_run_blep( r, &out, out + write_size + write_extra ); memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); if (!used) @@ -875,6 +1377,27 @@ static void resampler_fill(resampler * r) resampler_run_linear( r, &out, out + write_size ); break; + case RESAMPLER_QUALITY_BLAM: + { + float * out_ = out; + int write_extra = 0; + if ( write_pos >= r->read_pos ) + write_extra = r->read_pos; + if ( write_extra > SINC_WIDTH * 2 - 1 ) + write_extra = SINC_WIDTH * 2 - 1; + memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_blam_sse( r, &out, out + write_size + write_extra ); + else +#endif + resampler_run_blam( r, &out, out + write_size + write_extra ); + memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); + if ( out == out_ ) + return; + break; + } + case RESAMPLER_QUALITY_CUBIC: #ifdef RESAMPLER_SSE if ( resampler_has_sse ) @@ -905,14 +1428,14 @@ static void resampler_fill_and_remove_delay(resampler * r) int delay = resampler_output_delay( r ); r->delay_removed = 0; while ( delay-- ) - resampler_remove_sample( r ); + resampler_remove_sample( r, 1 ); } } int resampler_get_sample_count(void *_r) { resampler * r = ( resampler * ) _r; - if ( r->read_filled < 1 && (r->quality != RESAMPLER_QUALITY_BLEP || r->inv_phase_inc)) + if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc)) resampler_fill_and_remove_delay( r ); return r->read_filled; } @@ -924,7 +1447,7 @@ int resampler_get_sample(void *_r) resampler_fill_and_remove_delay( r ); if ( r->read_filled < 1 ) return 0; - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) return (int)(r->buffer_out[ r->read_pos ] + r->accumulator); else return (int)r->buffer_out[ r->read_pos ]; @@ -937,24 +1460,27 @@ float resampler_get_sample_float(void *_r) resampler_fill_and_remove_delay( r ); if ( r->read_filled < 1 ) return 0; - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) return r->buffer_out[ r->read_pos ] + r->accumulator; else return r->buffer_out[ r->read_pos ]; } -void resampler_remove_sample(void *_r) +void resampler_remove_sample(void *_r, int decay) { resampler * r = ( resampler * ) _r; if ( r->read_filled > 0 ) { - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) { r->accumulator += r->buffer_out[ r->read_pos ]; r->buffer_out[ r->read_pos ] = 0; - r->accumulator -= r->accumulator * (1.0 / 8192.0); - if (fabs(r->accumulator) < 1e-20) - r->accumulator = 0; + if (decay) + { + r->accumulator -= r->accumulator * (1.0f / 8192.0f); + if (fabs(r->accumulator) < 1e-20f) + r->accumulator = 0; + } } --r->read_filled; r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size; diff --git a/Frameworks/Dumb/dumb/src/it/itmisc.c b/Frameworks/Dumb/dumb/src/it/itmisc.c index c891bd457..ea1170e4d 100644 --- a/Frameworks/Dumb/dumb/src/it/itmisc.c +++ b/Frameworks/Dumb/dumb/src/it/itmisc.c @@ -21,6 +21,8 @@ #include "internal/it.h" +int dumb_it_default_panning_separation = 25; + DUMB_IT_SIGDATA *duh_get_it_sigdata(DUH *duh) { diff --git a/Frameworks/Dumb/dumb/src/it/itrender.c b/Frameworks/Dumb/dumb/src/it/itrender.c index bba4e097b..1d9a3353f 100644 --- a/Frameworks/Dumb/dumb/src/it/itrender.c +++ b/Frameworks/Dumb/dumb/src/it/itrender.c @@ -3976,7 +3976,7 @@ static void process_playing(DUMB_IT_SIGRENDERER *sigrenderer, IT_PLAYING *playin playing->sample_vibrato_time += playing->sample->vibrato_speed; } -#ifdef _MSC_VER +#if defined(_MSC_VER) && _MSC_VER < 1800 static float log2(float x) {return (float)log(x)/(float)log(2.0f);} #endif diff --git a/Frameworks/Dumb/dumb/src/it/read669.c b/Frameworks/Dumb/dumb/src/it/read669.c index 4e29afbce..7660073aa 100644 --- a/Frameworks/Dumb/dumb/src/it/read669.c +++ b/Frameworks/Dumb/dumb/src/it/read669.c @@ -413,8 +413,9 @@ static DUMB_IT_SIGDATA *it_669_load_sigdata(DUMBFILE *f, int * ext) memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (i = 0; i < DUMB_IT_N_CHANNELS; i += 2) { - sigdata->channel_pan[i+0] = 48; - sigdata->channel_pan[i+1] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[i+0] = 32 + sep; + sigdata->channel_pan[i+1] = 32 - sep; } _dumb_it_fix_invalid_orders(sigdata); diff --git a/Frameworks/Dumb/dumb/src/it/readam.c b/Frameworks/Dumb/dumb/src/it/readam.c index ac1cc3996..37d62e928 100644 --- a/Frameworks/Dumb/dumb/src/it/readam.c +++ b/Frameworks/Dumb/dumb/src/it/readam.c @@ -347,10 +347,11 @@ static DUMB_IT_SIGDATA *it_riff_amff_load_sigdata( DUMBFILE * f, struct riff * s memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } for ( n = 0; (unsigned)n < stream->chunk_count; ++n ) @@ -578,10 +579,11 @@ static DUMB_IT_SIGDATA *it_riff_am_load_sigdata( DUMBFILE * f, struct riff * str memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } for ( n = 0; (unsigned)n < stream->chunk_count; ++n ) diff --git a/Frameworks/Dumb/dumb/src/it/readamf.c b/Frameworks/Dumb/dumb/src/it/readamf.c index 73176e98f..d79abccf0 100644 --- a/Frameworks/Dumb/dumb/src/it/readamf.c +++ b/Frameworks/Dumb/dumb/src/it/readamf.c @@ -334,8 +334,9 @@ static DUMB_IT_SIGDATA *it_amf_load_sigdata(DUMBFILE *f, int * version) } } else { + int sep = 32 * dumb_it_default_panning_separation / 100; for ( i = 0; i < 16; i++ ) { - sigdata->channel_pan[ i ] = ( dumbfile_getc( f ) & 1 ) ? 16 : 48; + sigdata->channel_pan[ i ] = ( dumbfile_getc( f ) & 1 ) ? 32 - sep : 32 + sep; } } diff --git a/Frameworks/Dumb/dumb/src/it/readasy.c b/Frameworks/Dumb/dumb/src/it/readasy.c index 785c4b63f..d32708774 100644 --- a/Frameworks/Dumb/dumb/src/it/readasy.c +++ b/Frameworks/Dumb/dumb/src/it/readasy.c @@ -303,10 +303,11 @@ static DUMB_IT_SIGDATA *it_asy_load_sigdata(DUMBFILE *f) memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (i = 0; i < DUMB_IT_N_CHANNELS; i += 4) { - sigdata->channel_pan[i+0] = 16; - sigdata->channel_pan[i+1] = 48; - sigdata->channel_pan[i+2] = 48; - sigdata->channel_pan[i+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[i+0] = 32 - sep; + sigdata->channel_pan[i+1] = 32 + sep; + sigdata->channel_pan[i+2] = 32 + sep; + sigdata->channel_pan[i+3] = 32 - sep; } _dumb_it_fix_invalid_orders(sigdata); diff --git a/Frameworks/Dumb/dumb/src/it/readdsmf.c b/Frameworks/Dumb/dumb/src/it/readdsmf.c index 0196f3df5..62c74e815 100644 --- a/Frameworks/Dumb/dumb/src/it/readdsmf.c +++ b/Frameworks/Dumb/dumb/src/it/readdsmf.c @@ -272,10 +272,11 @@ static DUMB_IT_SIGDATA *it_riff_dsmf_load_sigdata( DUMBFILE * f, struct riff * s memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } for ( n = 0; (unsigned)n < stream->chunk_count; ++n ) diff --git a/Frameworks/Dumb/dumb/src/it/readmod.c b/Frameworks/Dumb/dumb/src/it/readmod.c index 94fb7cc74..61e0d7a29 100644 --- a/Frameworks/Dumb/dumb/src/it/readmod.c +++ b/Frameworks/Dumb/dumb/src/it/readmod.c @@ -596,10 +596,11 @@ static DUMB_IT_SIGDATA *it_mod_load_sigdata(DUMBFILE *f, int restrict_) memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (i = 0; i < DUMB_IT_N_CHANNELS; i += 4) { - sigdata->channel_pan[i+0] = 16; - sigdata->channel_pan[i+1] = 48; - sigdata->channel_pan[i+2] = 48; - sigdata->channel_pan[i+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[i+0] = 32 - sep; + sigdata->channel_pan[i+1] = 32 + sep; + sigdata->channel_pan[i+2] = 32 + sep; + sigdata->channel_pan[i+3] = 32 - sep; } _dumb_it_fix_invalid_orders(sigdata); diff --git a/Frameworks/Dumb/dumb/src/it/readmtm.c b/Frameworks/Dumb/dumb/src/it/readmtm.c index 77f9d9e16..35611388e 100644 --- a/Frameworks/Dumb/dumb/src/it/readmtm.c +++ b/Frameworks/Dumb/dumb/src/it/readmtm.c @@ -229,10 +229,11 @@ static DUMB_IT_SIGDATA *it_mtm_load_sigdata(DUMBFILE *f, int * version) } for (n = 32; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample)); diff --git a/Frameworks/Dumb/dumb/src/it/readoldpsm.c b/Frameworks/Dumb/dumb/src/it/readoldpsm.c index d4faec147..6000bdb7d 100644 --- a/Frameworks/Dumb/dumb/src/it/readoldpsm.c +++ b/Frameworks/Dumb/dumb/src/it/readoldpsm.c @@ -598,10 +598,11 @@ static DUMB_IT_SIGDATA *it_old_psm_load_sigdata(DUMBFILE *f) memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } for (n = 0; n < n_components; n++) diff --git a/Frameworks/Dumb/dumb/src/it/readpsm.c b/Frameworks/Dumb/dumb/src/it/readpsm.c index 99e97eb64..43cceae18 100644 --- a/Frameworks/Dumb/dumb/src/it/readpsm.c +++ b/Frameworks/Dumb/dumb/src/it/readpsm.c @@ -659,10 +659,11 @@ static DUMB_IT_SIGDATA *it_psm_load_sigdata(DUMBFILE *f, int * ver, int subsong) memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS); for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) { - sigdata->channel_pan[n ] = 16; - sigdata->channel_pan[n+1] = 48; - sigdata->channel_pan[n+2] = 48; - sigdata->channel_pan[n+3] = 16; + int sep = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[n ] = 32 - sep; + sigdata->channel_pan[n+1] = 32 + sep; + sigdata->channel_pan[n+2] = 32 + sep; + sigdata->channel_pan[n+3] = 32 - sep; } for (n = 0; n < n_song_chunks; n++) { diff --git a/Frameworks/Dumb/dumb/src/it/reads3m.c b/Frameworks/Dumb/dumb/src/it/reads3m.c index 2ee647a4a..a1b1f7f68 100644 --- a/Frameworks/Dumb/dumb/src/it/reads3m.c +++ b/Frameworks/Dumb/dumb/src/it/reads3m.c @@ -557,12 +557,13 @@ static DUMB_IT_SIGDATA *it_s3m_load_sigdata(DUMBFILE *f, int * cwtv) /* Channel settings for 32 channels, 255=unused, +128=disabled */ { int i; + int sep = (7 * dumb_it_default_panning_separation + 50) / 100; for (i = 0; i < 32; i++) { int c = dumbfile_getc(f); if (!(c & (128 | 16))) { /* +128=disabled, +16=Adlib */ if (sigdata->n_pchannels < i + 1) sigdata->n_pchannels = i + 1; sigdata->channel_volume[i] = 64; - sigdata->channel_pan[i] = c & 8 ? 12 : 3; + sigdata->channel_pan[i] = c & 8 ? 7 + sep : 7 - sep; /** WARNING: ah, but it should be 7 for mono... */ } else { /** WARNING: this could be improved if we support channel muting... */ diff --git a/Frameworks/Dumb/dumb/src/it/readstm.c b/Frameworks/Dumb/dumb/src/it/readstm.c index 2cbe715d6..86cd53579 100644 --- a/Frameworks/Dumb/dumb/src/it/readstm.c +++ b/Frameworks/Dumb/dumb/src/it/readstm.c @@ -61,6 +61,7 @@ static int it_stm_read_sample_header( IT_SAMPLE *sample, DUMBFILE *f, unsigned s /* Looks like no-existy. */ sample->flags &= ~IT_SAMPLE_EXISTS; sample->length = 0; + *offset = 0; return dumbfile_error( f ); } @@ -278,10 +279,11 @@ static DUMB_IT_SIGDATA *it_stm_load_sigdata(DUMBFILE *f, int * version) } memset( sigdata->channel_volume, 64, 4 ); - sigdata->channel_pan[ 0 ] = 48; - sigdata->channel_pan[ 1 ] = 16; - sigdata->channel_pan[ 2 ] = 48; - sigdata->channel_pan[ 3 ] = 16; + n = 32 * dumb_it_default_panning_separation / 100; + sigdata->channel_pan[ 0 ] = 32 + n; + sigdata->channel_pan[ 1 ] = 32 - n; + sigdata->channel_pan[ 2 ] = 32 + n; + sigdata->channel_pan[ 3 ] = 32 - n; for ( n = 0; n < sigdata->n_samples; ++n ) { if ( it_stm_read_sample_header( &sigdata->sample[ n ], f, &sample_offset[ n ] ) ) { diff --git a/Frameworks/modplay/modplay/ft2play.c b/Frameworks/modplay/modplay/ft2play.c index 9b6236bf0..41a7539ba 100644 --- a/Frameworks/modplay/modplay/ft2play.c +++ b/Frameworks/modplay/modplay/ft2play.c @@ -3030,7 +3030,8 @@ static inline void mix8b(PLAYER *p, uint32_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler)) + while (interpolating && (resampler_get_free_count(resampler) || + !resampler_get_sample_count(resampler))) { resampler_write_sample_fixed(resampler, sampleData[samplePosition], 8); @@ -3085,7 +3086,7 @@ static inline void mix8b(PLAYER *p, uint32_t ch, uint32_t samples) } sample = resampler_get_sample_float(resampler); - resampler_remove_sample(resampler); + resampler_remove_sample(resampler, 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3193,7 +3194,9 @@ static inline void mix8bstereo(PLAYER *p, uint32_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler[0])) + while (interpolating && (resampler_get_free_count(resampler[0]) || + (!resampler_get_sample_count(resampler[0]) && + !resampler_get_sample_count(resampler[1])))) { resampler_write_sample_fixed(resampler[0], sampleData[samplePosition], 8); resampler_write_sample_fixed(resampler[1], sampleData[sampleLength + samplePosition], 8); @@ -3250,8 +3253,8 @@ static inline void mix8bstereo(PLAYER *p, uint32_t ch, uint32_t samples) sampleL = resampler_get_sample_float(resampler[0]); sampleR = resampler_get_sample_float(resampler[1]); - resampler_remove_sample(resampler[0]); - resampler_remove_sample(resampler[1]); + resampler_remove_sample(resampler[0], 1); + resampler_remove_sample(resampler[1], 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3359,7 +3362,8 @@ static inline void mix16b(PLAYER *p, uint32_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler)) + while (interpolating && (resampler_get_free_count(resampler) || + !resampler_get_sample_count(resampler))) { resampler_write_sample_fixed(resampler, sampleData[samplePosition], 16); @@ -3414,7 +3418,7 @@ static inline void mix16b(PLAYER *p, uint32_t ch, uint32_t samples) } sample = resampler_get_sample_float(resampler); - resampler_remove_sample(resampler); + resampler_remove_sample(resampler, 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3522,7 +3526,9 @@ static inline void mix16bstereo(PLAYER *p, uint32_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler[0])) + while (interpolating && (resampler_get_free_count(resampler[0]) || + (!resampler_get_sample_count(resampler[0]) && + !resampler_get_sample_count(resampler[1])))) { resampler_write_sample_fixed(resampler[0], sampleData[samplePosition], 16); resampler_write_sample_fixed(resampler[1], sampleData[sampleLength + samplePosition], 16); @@ -3579,8 +3585,8 @@ static inline void mix16bstereo(PLAYER *p, uint32_t ch, uint32_t samples) sampleL = resampler_get_sample_float(resampler[0]); sampleR = resampler_get_sample_float(resampler[1]); - resampler_remove_sample(resampler[0]); - resampler_remove_sample(resampler[1]); + resampler_remove_sample(resampler[0], 1); + resampler_remove_sample(resampler[1], 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) diff --git a/Frameworks/modplay/modplay/resampler.c b/Frameworks/modplay/modplay/resampler.c index 05a7c2741..0366f38f0 100644 --- a/Frameworks/modplay/modplay/resampler.c +++ b/Frameworks/modplay/modplay/resampler.c @@ -6,6 +6,13 @@ #include #define RESAMPLER_SSE #endif +#ifdef __APPLE__ +#include +#if TARGET_CPU_ARM +#include +#define RESAMPLER_NEON +#endif +#endif #ifdef _MSC_VER #define ALIGNED _declspec(align(16)) @@ -27,6 +34,10 @@ enum { SINC_WIDTH = 16 }; enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH }; enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 }; +static const float RESAMPLER_BLEP_CUTOFF = 0.90f; +static const float RESAMPLER_BLAM_CUTOFF = 0.93f; +static const float RESAMPLER_SINC_CUTOFF = 0.999f; + ALIGNED static float cubic_lut[CUBIC_SAMPLES]; static float sinc_lut[SINC_SAMPLES + 1]; @@ -129,10 +140,10 @@ typedef struct resampler { int write_pos, write_filled; int read_pos, read_filled; - unsigned int phase; - unsigned int phase_inc; - unsigned int inv_phase; - unsigned int inv_phase_inc; + float phase; + float phase_inc; + float inv_phase; + float inv_phase_inc; unsigned char quality; signed char delay_added; signed char delay_removed; @@ -173,25 +184,10 @@ void resampler_delete(void * _r) void * resampler_dup(const void * _r) { - const resampler * r_in = ( const resampler * ) _r; - resampler * r_out = ( resampler * ) malloc( sizeof(resampler) ); + void * r_out = malloc( sizeof(resampler) ); if ( !r_out ) return 0; - r_out->write_pos = r_in->write_pos; - r_out->write_filled = r_in->write_filled; - r_out->read_pos = r_in->read_pos; - r_out->read_filled = r_in->read_filled; - r_out->phase = r_in->phase; - r_out->phase_inc = r_in->phase_inc; - r_out->inv_phase = r_in->inv_phase; - r_out->inv_phase_inc = r_in->inv_phase_inc; - r_out->quality = r_in->quality; - r_out->delay_added = r_in->delay_added; - r_out->delay_removed = r_in->delay_removed; - r_out->last_amp = r_in->last_amp; - r_out->accumulator = r_in->accumulator; - memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) ); - memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) ); + resampler_dup_inplace(r_out, _r); return r_out; } @@ -227,7 +223,8 @@ void resampler_set_quality(void *_r, int quality) quality = RESAMPLER_QUALITY_MAX; if ( r->quality != quality ) { - if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP ) + if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP || + quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM ) { r->read_pos = 0; r->read_filled = 0; @@ -257,6 +254,7 @@ static int resampler_min_filled(resampler *r) return 1; case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 2; case RESAMPLER_QUALITY_CUBIC: @@ -275,6 +273,7 @@ static int resampler_input_delay(resampler *r) case RESAMPLER_QUALITY_ZOH: case RESAMPLER_QUALITY_BLEP: case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 0; case RESAMPLER_QUALITY_CUBIC: @@ -297,6 +296,7 @@ static int resampler_output_delay(resampler *r) return 0; case RESAMPLER_QUALITY_BLEP: + case RESAMPLER_QUALITY_BLAM: return SINC_WIDTH - 1; } } @@ -319,16 +319,19 @@ void resampler_clear(void *_r) r->delay_removed = -1; memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); - if (r->quality == RESAMPLER_QUALITY_BLEP) + if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM) + { + r->inv_phase = 0; memset(r->buffer_out, 0, sizeof(r->buffer_out)); + } } void resampler_set_rate(void *_r, double new_factor) { resampler * r = ( resampler * ) _r; - r->phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA ); + r->phase_inc = new_factor; new_factor = 1.0 / new_factor; - r->inv_phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA ); + r->inv_phase_inc = new_factor; } void resampler_write_sample(void *_r, short s) @@ -390,8 +393,8 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -405,9 +408,9 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -422,6 +425,7 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON static int resampler_run_blep(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -434,38 +438,45 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; - int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA; - int i = SINC_WIDTH; float sample; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(phase_reduced - pos); - kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) - out[i] += sample * kernel[i]; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } inv_phase += inv_phase_inc; - out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -480,6 +491,7 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) @@ -494,50 +506,134 @@ static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - // accumulate in extended precision - float kernel_sum = 0.0; - __m128 kernel[SINC_WIDTH / 2]; - __m128 temp1, temp2; - __m128 samplex; float sample; - float *kernelf = (float*)(&kernel); - int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA; - int i = SINC_WIDTH; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(phase_reduced - pos); - kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - samplex = _mm_set1_ps( sample ); - for (i = 0; i < SINC_WIDTH / 2; ++i) + + if (sample) { - temp1 = _mm_load_ps( (const float *)( kernel + i ) ); - temp1 = _mm_mul_ps( temp1, samplex ); - temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); - temp1 = _mm_add_ps( temp1, temp2 ); - _mm_storeu_ps( (float *) out + i * 4, temp1 ); + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } } inv_phase += inv_phase_inc; - out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blep(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -565,8 +661,8 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -575,14 +671,14 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - sample = in[0] + (in[1] - in[0]) * ((float)phase / RESAMPLER_RESOLUTION_EXTRA); + sample = in[0] + (in[1] - in[0]) * phase; *out++ = sample; phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -597,6 +693,287 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * phase; + sample -= last_amp; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + in += (int)phase; + phase = fmod(phase, 1.0f); + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + { + sample += (in[1] - in[0]) * phase; + } + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * fphase; + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -608,8 +985,8 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -620,7 +997,7 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - kernel = cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4; + kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4; for (sample = 0, i = 0; i < 4; ++i) sample += in[i] * kernel[i]; @@ -628,63 +1005,9 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; - } - while ( in < in_end ); - - r->phase = phase; - *out_ = out; - - used = (int)(in - in_); - - r->write_filled -= used; - } - - return used; -} - -#ifdef RESAMPLER_SSE -static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) -{ - int in_size = r->write_filled; - float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; - int used = 0; - in_size -= 4; - if ( in_size > 0 ) - { - float* out = *out_; - float const* in = in_; - float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; - - do - { - __m128 temp1, temp2; - __m128 samplex = _mm_setzero_ps(); - - if ( out >= out_end ) - break; - - temp1 = _mm_loadu_ps( (const float *)( in ) ); - temp2 = _mm_load_ps( (const float *)( cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4 ) ); - temp1 = _mm_mul_ps( temp1, temp2 ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = _mm_movehl_ps( temp1, samplex ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = samplex; - temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); - samplex = _mm_add_ps( samplex, temp1 ); - _mm_store_ss( out, samplex ); - ++out; - - phase += phase_inc; - - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); - - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -700,6 +1023,111 @@ static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end } #endif +#ifdef RESAMPLER_SSE +static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + __m128 temp1, temp2; + __m128 samplex = _mm_setzero_ps(); + + if ( out >= out_end ) + break; + + temp1 = _mm_loadu_ps( (const float *)( in ) ); + temp2 = _mm_load_ps( (const float *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = _mm_mul_ps( temp1, temp2 ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = _mm_movehl_ps( temp1, samplex ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = samplex; + temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); + samplex = _mm_add_ps( samplex, temp1 ); + _mm_store_ss( out, samplex ); + ++out; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float32x4_t temp1, temp2; + float32x2_t half; + + if ( out >= out_end ) + break; + + temp1 = vld1q_f32( (const float32_t *)( in ) ); + temp2 = vld1q_f32( (const float32_t *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = vmulq_f32( temp1, temp2 ); + half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -711,17 +1139,17 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do { float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; int i = SINC_WIDTH; - int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; float sample; @@ -740,9 +1168,9 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -756,6 +1184,7 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) @@ -769,10 +1198,10 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do @@ -784,7 +1213,7 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) __m128 samplex = _mm_setzero_ps(); float *kernelf = (float*)(&kernel); int i = SINC_WIDTH; - int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; if ( out >= out_end ) @@ -816,9 +1245,80 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA); + in += (int)phase; - phase &= RESAMPLER_RESOLUTION_EXTRA - 1; + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + // accumulate in extended precision + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex = {0}; + float32x2_t half; + float *kernelf = (float*)(&kernel); + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) ); + temp2 = vld1q_f32( (const float32_t *)( kernel + i ) ); + samplex = vmlaq_f32( samplex, temp1, temp2 ); + } + kernel_sum = 1.0 / kernel_sum; + samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum)); + half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -861,9 +1361,11 @@ static void resampler_fill(resampler * r) if ( write_extra > SINC_WIDTH * 2 - 1 ) write_extra = SINC_WIDTH * 2 - 1; memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE if ( resampler_has_sse ) used = resampler_run_blep_sse( r, &out, out + write_size + write_extra ); else +#endif used = resampler_run_blep( r, &out, out + write_size + write_extra ); memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); if (!used) @@ -875,6 +1377,27 @@ static void resampler_fill(resampler * r) resampler_run_linear( r, &out, out + write_size ); break; + case RESAMPLER_QUALITY_BLAM: + { + float * out_ = out; + int write_extra = 0; + if ( write_pos >= r->read_pos ) + write_extra = r->read_pos; + if ( write_extra > SINC_WIDTH * 2 - 1 ) + write_extra = SINC_WIDTH * 2 - 1; + memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_blam_sse( r, &out, out + write_size + write_extra ); + else +#endif + resampler_run_blam( r, &out, out + write_size + write_extra ); + memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); + if ( out == out_ ) + return; + break; + } + case RESAMPLER_QUALITY_CUBIC: #ifdef RESAMPLER_SSE if ( resampler_has_sse ) @@ -905,14 +1428,14 @@ static void resampler_fill_and_remove_delay(resampler * r) int delay = resampler_output_delay( r ); r->delay_removed = 0; while ( delay-- ) - resampler_remove_sample( r ); + resampler_remove_sample( r, 1 ); } } int resampler_get_sample_count(void *_r) { resampler * r = ( resampler * ) _r; - if ( r->read_filled < 1 && (r->quality != RESAMPLER_QUALITY_BLEP || r->inv_phase_inc)) + if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc)) resampler_fill_and_remove_delay( r ); return r->read_filled; } @@ -924,7 +1447,7 @@ int resampler_get_sample(void *_r) resampler_fill_and_remove_delay( r ); if ( r->read_filled < 1 ) return 0; - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) return (int)(r->buffer_out[ r->read_pos ] + r->accumulator); else return (int)r->buffer_out[ r->read_pos ]; @@ -937,24 +1460,27 @@ float resampler_get_sample_float(void *_r) resampler_fill_and_remove_delay( r ); if ( r->read_filled < 1 ) return 0; - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) return r->buffer_out[ r->read_pos ] + r->accumulator; else return r->buffer_out[ r->read_pos ]; } -void resampler_remove_sample(void *_r) +void resampler_remove_sample(void *_r, int decay) { resampler * r = ( resampler * ) _r; if ( r->read_filled > 0 ) { - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) { r->accumulator += r->buffer_out[ r->read_pos ]; r->buffer_out[ r->read_pos ] = 0; - r->accumulator -= r->accumulator * (1.0 / 8192.0); - if (fabs(r->accumulator) < 1e-20) - r->accumulator = 0; + if (decay) + { + r->accumulator -= r->accumulator * (1.0f / 8192.0f); + if (fabs(r->accumulator) < 1e-20f) + r->accumulator = 0; + } } --r->read_filled; r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size; diff --git a/Frameworks/modplay/modplay/resampler.h b/Frameworks/modplay/modplay/resampler.h index 90a98714f..0050ebf1a 100644 --- a/Frameworks/modplay/modplay/resampler.h +++ b/Frameworks/modplay/modplay/resampler.h @@ -36,9 +36,10 @@ enum RESAMPLER_QUALITY_ZOH = 0, RESAMPLER_QUALITY_BLEP = 1, RESAMPLER_QUALITY_LINEAR = 2, - RESAMPLER_QUALITY_CUBIC = 3, - RESAMPLER_QUALITY_SINC = 4, - RESAMPLER_QUALITY_MAX = 4 + RESAMPLER_QUALITY_BLAM = 3, + RESAMPLER_QUALITY_CUBIC = 4, + RESAMPLER_QUALITY_SINC = 5, + RESAMPLER_QUALITY_MAX = 5 }; void resampler_set_quality(void *, int quality); @@ -52,6 +53,6 @@ void resampler_clear(void *); int resampler_get_sample_count(void *); int resampler_get_sample(void *); float resampler_get_sample_float(void *); -void resampler_remove_sample(void *); +void resampler_remove_sample(void *, int decay); #endif diff --git a/Frameworks/modplay/modplay/st3play.c b/Frameworks/modplay/modplay/st3play.c index 53e62454f..2df4c86d2 100644 --- a/Frameworks/modplay/modplay/st3play.c +++ b/Frameworks/modplay/modplay/st3play.c @@ -2947,7 +2947,8 @@ static inline void mix8b(PLAYER *p, uint8_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler)) + while (interpolating && (resampler_get_free_count(resampler) || + !resampler_get_sample_count(resampler))) { resampler_write_sample_fixed(resampler, sampleData[samplePosition], 8); @@ -2976,7 +2977,7 @@ static inline void mix8b(PLAYER *p, uint8_t ch, uint32_t samples) } sample = resampler_get_sample_float(resampler); - resampler_remove_sample(resampler); + resampler_remove_sample(resampler, 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -2991,7 +2992,8 @@ static inline void mix8b(PLAYER *p, uint8_t ch, uint32_t samples) { v->fader = v->faderDest; resampler_clear(resampler); - p->voice[ch].mixing = 0; + v->mixing = 0; + sampleData = 0; } sample *= v->fader; @@ -3099,7 +3101,9 @@ static inline void mix8bstereo(PLAYER *p, uint8_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler[0])) + while (interpolating && (resampler_get_free_count(resampler[0]) || + (!resampler_get_sample_count(resampler[0]) && + !resampler_get_sample_count(resampler[1])))) { resampler_write_sample_fixed(resampler[0], sampleData[samplePosition], 8); resampler_write_sample_fixed(resampler[1], sampleData[sampleLength + samplePosition], 8); @@ -3131,8 +3135,8 @@ static inline void mix8bstereo(PLAYER *p, uint8_t ch, uint32_t samples) sampleL = resampler_get_sample_float(resampler[0]); sampleR = resampler_get_sample_float(resampler[1]); - resampler_remove_sample(resampler[0]); - resampler_remove_sample(resampler[1]); + resampler_remove_sample(resampler[0], 1); + resampler_remove_sample(resampler[1], 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3148,6 +3152,7 @@ static inline void mix8bstereo(PLAYER *p, uint8_t ch, uint32_t samples) v->fader = v->faderDest; resampler_clear(resampler); v->mixing = 0; + sampleData = 0; } sampleL *= v->fader; @@ -3250,7 +3255,8 @@ static inline void mix16b(PLAYER *p, uint8_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler)) + while (interpolating && (resampler_get_free_count(resampler) || + !resampler_get_sample_count(resampler))) { resampler_write_sample_fixed(resampler, (int16_t)get_le16(&sampleData[samplePosition]), 16); @@ -3279,7 +3285,7 @@ static inline void mix16b(PLAYER *p, uint8_t ch, uint32_t samples) } sample = resampler_get_sample_float(resampler); - resampler_remove_sample(resampler); + resampler_remove_sample(resampler, 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3295,6 +3301,7 @@ static inline void mix16b(PLAYER *p, uint8_t ch, uint32_t samples) v->fader = v->faderDest; resampler_clear(resampler); v->mixing = 0; + sampleData = 0; } sample *= v->fader; @@ -3402,7 +3409,9 @@ static inline void mix16bstereo(PLAYER *p, uint8_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler[0])) + while (interpolating && (resampler_get_free_count(resampler[0]) || + (!resampler_get_sample_count(resampler[0]) && + !resampler_get_sample_count(resampler[1])))) { resampler_write_sample_fixed(resampler[0], (int16_t)get_le16(&sampleData[samplePosition]), 16); resampler_write_sample_fixed(resampler[1], (int16_t)get_le16(&sampleData[sampleLength + samplePosition]), 16); @@ -3434,8 +3443,8 @@ static inline void mix16bstereo(PLAYER *p, uint8_t ch, uint32_t samples) sampleL = resampler_get_sample_float(resampler[0]); sampleR = resampler_get_sample_float(resampler[1]); - resampler_remove_sample(resampler[0]); - resampler_remove_sample(resampler[1]); + resampler_remove_sample(resampler[0], 1); + resampler_remove_sample(resampler[1], 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3451,6 +3460,7 @@ static inline void mix16bstereo(PLAYER *p, uint8_t ch, uint32_t samples) v->fader = v->faderDest; resampler_clear(resampler); v->mixing = 0; + sampleData = 0; } sampleL *= v->fader; @@ -3572,7 +3582,8 @@ static inline void mixadpcm(PLAYER *p, uint8_t ch, uint32_t samples) { samplePosition = v->samplePosition; - while (interpolating && resampler_get_free_count(resampler)) + while (interpolating && (resampler_get_free_count(resampler) || + !resampler_get_sample_count(resampler))) { int8_t nextDelta = lastDelta; int16_t sample = get_adpcm_sample(sampleDictionary, sampleData, samplePosition, &nextDelta); @@ -3614,7 +3625,7 @@ static inline void mixadpcm(PLAYER *p, uint8_t ch, uint32_t samples) } sample = resampler_get_sample_float(resampler); - resampler_remove_sample(resampler); + resampler_remove_sample(resampler, 1); #ifdef USE_VOL_RAMP if (rampStyle > 0) @@ -3630,6 +3641,7 @@ static inline void mixadpcm(PLAYER *p, uint8_t ch, uint32_t samples) v->fader = v->faderDest; resampler_clear(resampler); v->mixing = 0; + sampleData = 0; } sample *= v->fader; @@ -3773,7 +3785,7 @@ static void st3play_AdlibMix(PLAYER *p, float *buffer, int32_t count) for (i = 0; i < outbuffer_avail; ++i) { float sample = resampler_get_sample_float( p->fmResampler ); - resampler_remove_sample( p->fmResampler ); + resampler_remove_sample( p->fmResampler, 1 ); buffer[i * 2 + 0] += sample; buffer[i * 2 + 1] += sample; diff --git a/Frameworks/playptmod/playptmod.xcodeproj/project.pbxproj b/Frameworks/playptmod/playptmod.xcodeproj/project.pbxproj index 9022d9648..c73b5c2e1 100644 --- a/Frameworks/playptmod/playptmod.xcodeproj/project.pbxproj +++ b/Frameworks/playptmod/playptmod.xcodeproj/project.pbxproj @@ -7,16 +7,16 @@ objects = { /* Begin PBXBuildFile section */ - 83304C9B1A5F9A1C0066CDDA /* pt_blep.c in Sources */ = {isa = PBXBuildFile; fileRef = 83304C991A5F9A1C0066CDDA /* pt_blep.c */; }; - 83304C9C1A5F9A1C0066CDDA /* pt_blep.h in Headers */ = {isa = PBXBuildFile; fileRef = 83304C9A1A5F9A1C0066CDDA /* pt_blep.h */; }; + 832127D71A622EEC00979C39 /* resampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 832127D51A622EEC00979C39 /* resampler.h */; }; + 832127D81A622EEC00979C39 /* resampler.c in Sources */ = {isa = PBXBuildFile; fileRef = 832127D61A622EEC00979C39 /* resampler.c */; }; 83A0F4A61816CEAD00119DB4 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 83A0F4A41816CEAD00119DB4 /* InfoPlist.strings */; }; 83A0F4D51816CF9500119DB4 /* playptmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 83A0F4D11816CF9500119DB4 /* playptmod.c */; }; 83A0F4D61816CF9500119DB4 /* playptmod.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A0F4D21816CF9500119DB4 /* playptmod.h */; settings = {ATTRIBUTES = (Public, ); }; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ - 83304C991A5F9A1C0066CDDA /* pt_blep.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pt_blep.c; sourceTree = ""; }; - 83304C9A1A5F9A1C0066CDDA /* pt_blep.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pt_blep.h; sourceTree = ""; }; + 832127D51A622EEC00979C39 /* resampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resampler.h; sourceTree = ""; }; + 832127D61A622EEC00979C39 /* resampler.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resampler.c; sourceTree = ""; }; 83A0F4981816CEAD00119DB4 /* playptmod.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = playptmod.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 83A0F4A31816CEAD00119DB4 /* playptmod-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "playptmod-Info.plist"; sourceTree = ""; }; 83A0F4A51816CEAD00119DB4 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; @@ -70,8 +70,8 @@ 83A0F4A11816CEAD00119DB4 /* playptmod */ = { isa = PBXGroup; children = ( - 83304C991A5F9A1C0066CDDA /* pt_blep.c */, - 83304C9A1A5F9A1C0066CDDA /* pt_blep.h */, + 832127D51A622EEC00979C39 /* resampler.h */, + 832127D61A622EEC00979C39 /* resampler.c */, 83A0F4D11816CF9500119DB4 /* playptmod.c */, 83A0F4D21816CF9500119DB4 /* playptmod.h */, 83A0F4A21816CEAD00119DB4 /* Supporting Files */, @@ -95,8 +95,8 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + 832127D71A622EEC00979C39 /* resampler.h in Headers */, 83A0F4D61816CF9500119DB4 /* playptmod.h in Headers */, - 83304C9C1A5F9A1C0066CDDA /* pt_blep.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -163,8 +163,8 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 83304C9B1A5F9A1C0066CDDA /* pt_blep.c in Sources */, 83A0F4D51816CF9500119DB4 /* playptmod.c in Sources */, + 832127D81A622EEC00979C39 /* resampler.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Frameworks/playptmod/playptmod/playptmod.c b/Frameworks/playptmod/playptmod/playptmod.c index 6501019e6..169e5c39d 100644 --- a/Frameworks/playptmod/playptmod/playptmod.c +++ b/Frameworks/playptmod/playptmod/playptmod.c @@ -29,7 +29,7 @@ #define _USE_MATH_DEFINES // visual studio #include "playptmod.h" -#include "pt_blep.h" +#include "resampler.h" #include #include // memcpy() @@ -185,8 +185,8 @@ typedef struct voice_data int panR; int step; int newStep; - float frac; float rate; + int interpolating; int mute; } Voice; @@ -241,8 +241,8 @@ typedef struct FilterC filterC; float *mixBufferL; float *mixBufferR; - BLEP blep[MAX_CHANNELS]; - BLEP blepVol[MAX_CHANNELS]; + void * blep[MAX_CHANNELS]; + void * blepVol[MAX_CHANNELS]; unsigned int orderPlayed[256]; MODULE *source; } player; @@ -437,6 +437,7 @@ static void mixerSwapChSource(player *p, int ch, const signed char *src, int len v->newLoopBegin = loopStart; v->newLoopEnd = loopStart + loopLength; v->newStep = step; + v->interpolating = 1; // if the mixer was already shut down because of a short non-loop sample, force swap if (v->data == NULL) @@ -446,7 +447,6 @@ static void mixerSwapChSource(player *p, int ch, const signed char *src, int len v->loopFlag = v->newLoopFlag; v->data = v->newData; v->length = v->newLength; - v->frac = 0.0f; v->step = v->newStep; // for safety, shut down voice if the sample position is overriding the length @@ -464,12 +464,12 @@ static void mixerSetChSource(player *p, int ch, const signed char *src, int leng v->swapSampleFlag = false; v->data = src; v->index = offset; - v->frac = 0.0f; v->length = length; v->loopFlag = loopLength > (2 * step); v->loopBegin = loopStart; v->loopEnd = loopStart + loopLength; v->step = step; + v->interpolating = 1; // Check external 9xx usage (Set Sample Offset) if (v->loopFlag) @@ -525,8 +525,8 @@ static void mixerCutChannels(player *p) memset(p->v, 0, sizeof (p->v)); for (i = 0; i < MAX_CHANNELS; ++i) { - memset(&p->blep[i], 0, sizeof(BLEP)); - memset(&p->blepVol[i], 0, sizeof(BLEP)); + resampler_clear(p->blep[i]); + resampler_clear(p->blepVol[i]); } memset(&p->filter, 0, sizeof (p->filter)); @@ -545,7 +545,7 @@ static void mixerCutChannels(player *p) static void mixerSetChRate(player *p, int ch, float rate) { - p->v[ch].rate = rate; + p->v[ch].rate = 1.0f / rate; } static void outputAudio(player *p, int *target, int numSamples) @@ -556,6 +556,7 @@ static void outputAudio(player *p, int *target, int numSamples) int step; int tempVolume; int delta; + int interpolating; unsigned int i; unsigned int j; float L; @@ -565,8 +566,8 @@ static void outputAudio(player *p, int *target, int numSamples) float downscale; Voice *v; - BLEP *bSmp; - BLEP *bVol; + void *bSmp; + void *bVol; memset(p->mixBufferL, 0, numSamples * sizeof (float)); memset(p->mixBufferR, 0, numSamples * sizeof (float)); @@ -576,27 +577,99 @@ static void outputAudio(player *p, int *target, int numSamples) j = 0; v = &p->v[i]; - bSmp = &p->blep[i]; - bVol = &p->blepVol[i]; + bSmp = p->blep[i]; + bVol = p->blepVol[i]; if (v->data && v->rate) { step = v->step; + interpolating = v->interpolating; + resampler_set_rate(bSmp, v->rate); + resampler_set_rate(bVol, v->rate); + for (j = 0; j < numSamples;) { - tempSample = (v->data ? (step == 2 ? (v->data[v->index] + v->data[v->index + 1] * 0x100) : v->data[v->index] * 0x100) : 0); tempVolume = (v->data && !v->mute ? v->vol : 0); - while (j < numSamples && (!v->data || v->frac >= 1.0f)) + while (interpolating && (resampler_get_free_count(bSmp) || + !resampler_get_sample_count(bSmp))) { - t_vol = 0.0f; - t_smp = 0.0f; + tempSample = (v->data ? (step == 2 ? (v->data[v->index] + v->data[v->index + 1] * 0x100) : v->data[v->index] * 0x100) : 0); + + resampler_write_sample_fixed(bSmp, tempSample, 1); + resampler_write_sample_fixed(bVol, tempVolume, 1); if (v->data) - v->frac -= 1.0f; - - t_vol += blepRun(bVol); - t_smp += blepRun(bSmp); + { + v->index += step; + + if (v->loopFlag) + { + if (v->index >= v->loopEnd) + { + if (v->swapSampleFlag) + { + v->swapSampleFlag = false; + + if (!v->newLoopFlag) + { + interpolating = 0; + break; + } + + v->loopBegin = v->newLoopBegin; + v->loopEnd = v->newLoopEnd; + v->loopFlag = v->newLoopFlag; + v->data = v->newData; + v->length = v->newLength; + v->step = v->newStep; + + v->index = v->loopBegin; + } + else + { + v->index = v->loopBegin; + } + } + } + else if (v->index >= v->length) + { + if (v->swapSampleFlag) + { + v->swapSampleFlag = false; + + if (!v->newLoopFlag) + { + interpolating = 0; + break; + } + + v->loopBegin = v->newLoopBegin; + v->loopEnd = v->newLoopEnd; + v->loopFlag = v->newLoopFlag; + v->data = v->newData; + v->length = v->newLength; + v->step = v->newStep; + + v->index = v->loopBegin; + } + else + { + interpolating = 0; + break; + } + } + } + } + + v->interpolating = interpolating; + + while (j < numSamples && resampler_get_sample_count(bSmp)) + { + t_vol = resampler_get_sample_float(bVol); + t_smp = resampler_get_sample_float(bSmp); + resampler_remove_sample(bVol, 0); + resampler_remove_sample(bSmp, 1); t_smp *= t_vol; i_smp = (signed int)t_smp; @@ -606,108 +679,12 @@ static void outputAudio(player *p, int *target, int numSamples) j++; } - - if (j >= numSamples) + + if (!interpolating && j < numSamples) + { + v->data = NULL; break; - - if (tempSample != bSmp->lastInput && v->frac >= 0.0f && v->frac < 1.0f) - { - delta = tempSample - bSmp->lastInput; - bSmp->lastInput = tempSample; - blepAdd(bSmp, v->frac, delta); } - - if (tempVolume != bVol->lastInput) - { - delta = tempVolume - bVol->lastInput; - bVol->lastInput = tempVolume; - blepAdd(bVol, 0, delta); - } - - if (v->data) - { - v->index += step; - v->frac += v->rate; - - if (v->loopFlag) - { - if (v->index >= v->loopEnd) - { - if (v->swapSampleFlag) - { - v->swapSampleFlag = false; - - if (!v->newLoopFlag) - { - v->data = NULL; - continue; - } - - v->loopBegin = v->newLoopBegin; - v->loopEnd = v->newLoopEnd; - v->loopFlag = v->newLoopFlag; - v->data = v->newData; - v->length = v->newLength; - v->frac = 0.0f; - v->step = v->newStep; - - while (v->index >= v->loopEnd) - v->index = v->loopBegin + (v->index - v->loopEnd); - } - else - { - while (v->index >= v->loopEnd) - v->index = v->loopBegin + (v->index - v->loopEnd); - } - } - } - else if (v->index >= v->length) - { - if (v->swapSampleFlag) - { - v->swapSampleFlag = false; - - if (!v->newLoopFlag) - { - v->data = NULL; - continue; - } - - v->loopBegin = v->newLoopBegin; - v->loopEnd = v->newLoopEnd; - v->loopFlag = v->newLoopFlag; - v->data = v->newData; - v->length = v->newLength; - v->frac = 0.0f; - v->step = v->newStep; - - while (v->index >= v->loopEnd) - v->index = v->loopBegin + (v->index - v->loopEnd); - } - else - { - v->data = NULL; - } - } - } - } - } - - if ((j < numSamples) && (v->data == NULL)) - { - for (; j < numSamples; ++j) - { - tempVolume = 0.0f; - tempSample = 0.0f; - - tempVolume += blepRun(bVol); - tempSample += blepRun(bSmp); - - tempSample *= tempVolume; - i_smp = (signed int)tempSample; - - p->mixBufferL[j] += i_smp * v->panL; - p->mixBufferR[j] += i_smp * v->panR; } } } @@ -2787,6 +2764,8 @@ void *playptmod_Create(int samplingFrequency) player *p = (player *) calloc(1, sizeof (player)); int i, j; + + resampler_init(); p->tempoTimerVal = (samplingFrequency * 125) / 50; @@ -2820,8 +2799,20 @@ void *playptmod_Create(int samplingFrequency) p->useLEDFilter = false; + for (i = 0; i < MAX_CHANNELS; ++i) + { + p->blep[i] = resampler_create(); + resampler_set_quality(p->blep[i], RESAMPLER_QUALITY_BLEP); + } + + for (i = 0; i < MAX_CHANNELS; ++i) + { + p->blepVol[i] = resampler_create(); + resampler_set_quality(p->blepVol[i], RESAMPLER_QUALITY_BLEP); + } + mixerCutChannels(p); - + return p; } @@ -2953,6 +2944,12 @@ void playptmod_Free(void *_p) p->extendedFrequencyTable = NULL; } + for (i = 0; i < MAX_CHANNELS; ++i) + { + resampler_delete(p->blep[i]); + resampler_delete(p->blepVol[i]); + } + free(p); } diff --git a/Frameworks/playptmod/playptmod/pt_blep.c b/Frameworks/playptmod/playptmod/pt_blep.c deleted file mode 100644 index 6cbab3e80..000000000 --- a/Frameworks/playptmod/playptmod/pt_blep.c +++ /dev/null @@ -1,93 +0,0 @@ -/* -** This file is part of the ProTracker 2.3D port/clone -** project by Olav "8bitbubsy" Sorensen. -** -** It contains unstructured and unclean code, but I care -** more about how the program works than how the source -** code looks. Although, I do respect coders that can -** master the art of writing clean and structured code. -** I know I can't. -** -** All of the files are considered 'public domain', -** do whatever you want with it. -** -*/ - -#include -#include "pt_blep.h" - -#define _LERP(I, F) ((I[0]) + ((I[1]) - (I[0])) * (F)) - -static const uint32_t blepData[48] = -{ - 0x3F7FE1F1, 0x3F7FD548, 0x3F7FD6A3, 0x3F7FD4E3, - 0x3F7FAD85, 0x3F7F2152, 0x3F7DBFAE, 0x3F7ACCDF, - 0x3F752F1E, 0x3F6B7384, 0x3F5BFBCB, 0x3F455CF2, - 0x3F26E524, 0x3F0128C4, 0x3EACC7DC, 0x3E29E86B, - 0x3C1C1D29, 0xBDE4BBE6, 0xBE3AAE04, 0xBE48DEDD, - 0xBE22AD7E, 0xBDB2309A, 0xBB82B620, 0x3D881411, - 0x3DDADBF3, 0x3DE2C81D, 0x3DAAA01F, 0x3D1E769A, - 0xBBC116D7, 0xBD1402E8, 0xBD38A069, 0xBD0C53BB, - 0xBC3FFB8C, 0x3C465FD2, 0x3CEA5764, 0x3D0A51D6, - 0x3CEAE2D5, 0x3C92AC5A, 0x3BE4CBF7, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000 -}; - -void blepAdd(BLEP *b, float offset, float amplitude) -{ - int8_t n; - - uint32_t i; - - const float *src; - float f; - float a; - - float k[NS]; - - n = NS; - i = (uint32_t)(offset * SP); - src = (const float *)(blepData) + i + OS; - f = (offset * SP) - i; - i = b->index; - a = 0.0f; - - while (n--) - { - a += k[n] = _LERP(src, f); - src += SP; - } - - n = NS; - a = 1.0f / a; - - while (n--) - { - b->buffer[i] += (amplitude * k[n]) * a; - - i++; - i &= RNS; - } - - b->samplesLeft = NS; -} - -float blepRun(BLEP *b) -{ - float output; - - output = b->buffer[b->index]; - b->buffer[b->index] = 0.0f; - - b->index++; - b->index &= RNS; - - b->samplesLeft--; - - output += b->lastOutput; - b->lastOutput = output; - - return (output); -} - diff --git a/Frameworks/playptmod/playptmod/pt_blep.h b/Frameworks/playptmod/playptmod/pt_blep.h deleted file mode 100644 index 29538c617..000000000 --- a/Frameworks/playptmod/playptmod/pt_blep.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -** This file is part of the ProTracker 2.3D port/clone -** project by Olav "8bitbubsy" Sorensen. -** -** It contains unstructured and unclean code, but I care -** more about how the program works than how the source -** code looks. Although, I do respect coders that can -** master the art of writing clean and structured code. -** I know I can't. -** -** All of the files are considered 'public domain', -** do whatever you want with it. -** -*/ - -#ifndef __PT_BLEP_H -#define __PT_BLEP_H - -#include - -// thanks to aciddose/ad/adejr for the blep/cutoff/filter stuff! - -// information on blep variables -// -// ZC = zero crossings, the number of ripples in the impulse -// OS = oversampling, how many samples per zero crossing are taken -// SP = step size per output sample, used to lower the cutoff (play the impulse slower) -// NS = number of samples of impulse to insert -// RNS = the lowest power of two greater than NS, minus one (used to wrap output buffer) -// -// ZC and OS are here only for reference, they depend upon the data in the table and can't be changed. -// SP, the step size can be any number lower or equal to OS, as long as the result NS remains an integer. -// for example, if ZC=8,OS=5, you can set SP=1, the result is NS=40, and RNS must then be 63. -// the result of that is the filter cutoff is set at nyquist * (SP/OS), in this case nyquist/5. - -#define ZC 8 -#define OS 5 -#define SP 5 -#define NS (ZC * OS / SP) -#define RNS 7 // RNS = (2^ > NS) - 1 - -typedef struct blep_data -{ - int32_t index; - int32_t samplesLeft; - float buffer[RNS + 1]; - float lastInput; - float lastOutput; -} BLEP; - -void blepAdd(BLEP *b, float offset, float amplitude); -float blepRun(BLEP *b); - -#endif - diff --git a/Frameworks/playptmod/playptmod/resampler.c b/Frameworks/playptmod/playptmod/resampler.c new file mode 100644 index 000000000..0366f38f0 --- /dev/null +++ b/Frameworks/playptmod/playptmod/resampler.c @@ -0,0 +1,1488 @@ +#include +#include +#define _USE_MATH_DEFINES +#include +#if (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__)) +#include +#define RESAMPLER_SSE +#endif +#ifdef __APPLE__ +#include +#if TARGET_CPU_ARM +#include +#define RESAMPLER_NEON +#endif +#endif + +#ifdef _MSC_VER +#define ALIGNED _declspec(align(16)) +#else +#define ALIGNED __attribute__((aligned(16))) +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "resampler.h" + +enum { RESAMPLER_SHIFT = 10 }; +enum { RESAMPLER_SHIFT_EXTRA = 8 }; +enum { RESAMPLER_RESOLUTION = 1 << RESAMPLER_SHIFT }; +enum { RESAMPLER_RESOLUTION_EXTRA = 1 << (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA) }; +enum { SINC_WIDTH = 16 }; +enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH }; +enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 }; + +static const float RESAMPLER_BLEP_CUTOFF = 0.90f; +static const float RESAMPLER_BLAM_CUTOFF = 0.93f; +static const float RESAMPLER_SINC_CUTOFF = 0.999f; + +ALIGNED static float cubic_lut[CUBIC_SAMPLES]; + +static float sinc_lut[SINC_SAMPLES + 1]; +static float window_lut[SINC_SAMPLES + 1]; + +enum { resampler_buffer_size = SINC_WIDTH * 4 }; + +static int fEqual(const float b, const float a) +{ + return fabs(a - b) < 1.0e-6; +} + +static float sinc(float x) +{ + return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI); +} + +#ifdef RESAMPLER_SSE +#ifdef _MSC_VER +#include +#elif defined(__clang__) || defined(__GNUC__) +static inline void +__cpuid(int *data, int selector) +{ +#if defined(__PIC__) && defined(__i386__) + asm("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi" + : "=a" (data[0]), + "=S" (data[1]), + "=c" (data[2]), + "=d" (data[3]) + : "0" (selector)); +#elif defined(__PIC__) && defined(__amd64__) + asm("xchg{q} {%%}rbx, %q1; cpuid; xchg{q} {%%}rbx, %q1" + : "=a" (data[0]), + "=&r" (data[1]), + "=c" (data[2]), + "=d" (data[3]) + : "0" (selector)); +#else + asm("cpuid" + : "=a" (data[0]), + "=b" (data[1]), + "=c" (data[2]), + "=d" (data[3]) + : "0" (selector)); +#endif +} +#else +#define __cpuid(a,b) memset((a), 0, sizeof(int) * 4) +#endif + +static int query_cpu_feature_sse() { + int buffer[4]; + __cpuid(buffer,1); + if ((buffer[3]&(1<<25)) == 0) return 0; + return 1; +} + +static int resampler_has_sse = 0; +#endif + +void resampler_init(void) +{ + unsigned i; + double dx = (float)(SINC_WIDTH) / SINC_SAMPLES, x = 0.0; + for (i = 0; i < SINC_SAMPLES + 1; ++i, x += dx) + { + float y = x / SINC_WIDTH; +#if 0 + // Blackman + float window = 0.42659 - 0.49656 * cos(M_PI + M_PI * y) + 0.076849 * cos(2.0 * M_PI * y); +#elif 1 + // Nuttal 3 term + float window = 0.40897 + 0.5 * cos(M_PI * y) + 0.09103 * cos(2.0 * M_PI * y); +#elif 0 + // C.R.Helmrich's 2 term window + float window = 0.79445 * cos(0.5 * M_PI * y) + 0.20555 * cos(1.5 * M_PI * y); +#elif 0 + // Lanczos + float window = sinc(y); +#endif + sinc_lut[i] = fabs(x) < SINC_WIDTH ? sinc(x) : 0.0; + window_lut[i] = window; + } + dx = 1.0 / (float)(RESAMPLER_RESOLUTION); + x = 0.0; + for (i = 0; i < RESAMPLER_RESOLUTION; ++i, x += dx) + { + cubic_lut[i*4] = (float)(-0.5 * x * x * x + x * x - 0.5 * x); + cubic_lut[i*4+1] = (float)( 1.5 * x * x * x - 2.5 * x * x + 1.0); + cubic_lut[i*4+2] = (float)(-1.5 * x * x * x + 2.0 * x * x + 0.5 * x); + cubic_lut[i*4+3] = (float)( 0.5 * x * x * x - 0.5 * x * x); + } +#ifdef RESAMPLER_SSE + resampler_has_sse = query_cpu_feature_sse(); +#endif +} + +typedef struct resampler +{ + int write_pos, write_filled; + int read_pos, read_filled; + float phase; + float phase_inc; + float inv_phase; + float inv_phase_inc; + unsigned char quality; + signed char delay_added; + signed char delay_removed; + float last_amp; + float accumulator; + float buffer_in[resampler_buffer_size * 2]; + float buffer_out[resampler_buffer_size + SINC_WIDTH * 2 - 1]; +} resampler; + +void * resampler_create(void) +{ + resampler * r = ( resampler * ) malloc( sizeof(resampler) ); + if ( !r ) return 0; + + r->write_pos = SINC_WIDTH - 1; + r->write_filled = 0; + r->read_pos = 0; + r->read_filled = 0; + r->phase = 0; + r->phase_inc = 0; + r->inv_phase = 0; + r->inv_phase_inc = 0; + r->quality = RESAMPLER_QUALITY_MAX; + r->delay_added = -1; + r->delay_removed = -1; + r->last_amp = 0; + r->accumulator = 0; + memset( r->buffer_in, 0, sizeof(r->buffer_in) ); + memset( r->buffer_out, 0, sizeof(r->buffer_out) ); + + return r; +} + +void resampler_delete(void * _r) +{ + free( _r ); +} + +void * resampler_dup(const void * _r) +{ + void * r_out = malloc( sizeof(resampler) ); + if ( !r_out ) return 0; + + resampler_dup_inplace(r_out, _r); + + return r_out; +} + +void resampler_dup_inplace(void *_d, const void *_s) +{ + const resampler * r_in = ( const resampler * ) _s; + resampler * r_out = ( resampler * ) _d; + + r_out->write_pos = r_in->write_pos; + r_out->write_filled = r_in->write_filled; + r_out->read_pos = r_in->read_pos; + r_out->read_filled = r_in->read_filled; + r_out->phase = r_in->phase; + r_out->phase_inc = r_in->phase_inc; + r_out->inv_phase = r_in->inv_phase; + r_out->inv_phase_inc = r_in->inv_phase_inc; + r_out->quality = r_in->quality; + r_out->delay_added = r_in->delay_added; + r_out->delay_removed = r_in->delay_removed; + r_out->last_amp = r_in->last_amp; + r_out->accumulator = r_in->accumulator; + memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) ); + memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) ); +} + +void resampler_set_quality(void *_r, int quality) +{ + resampler * r = ( resampler * ) _r; + if (quality < RESAMPLER_QUALITY_MIN) + quality = RESAMPLER_QUALITY_MIN; + else if (quality > RESAMPLER_QUALITY_MAX) + quality = RESAMPLER_QUALITY_MAX; + if ( r->quality != quality ) + { + if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP || + quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM ) + { + r->read_pos = 0; + r->read_filled = 0; + r->last_amp = 0; + r->accumulator = 0; + memset( r->buffer_out, 0, sizeof(r->buffer_out) ); + } + r->delay_added = -1; + r->delay_removed = -1; + } + r->quality = (unsigned char)quality; +} + +int resampler_get_free_count(void *_r) +{ + resampler * r = ( resampler * ) _r; + return resampler_buffer_size - r->write_filled; +} + +static int resampler_min_filled(resampler *r) +{ + switch (r->quality) + { + default: + case RESAMPLER_QUALITY_ZOH: + case RESAMPLER_QUALITY_BLEP: + return 1; + + case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: + return 2; + + case RESAMPLER_QUALITY_CUBIC: + return 4; + + case RESAMPLER_QUALITY_SINC: + return SINC_WIDTH * 2; + } +} + +static int resampler_input_delay(resampler *r) +{ + switch (r->quality) + { + default: + case RESAMPLER_QUALITY_ZOH: + case RESAMPLER_QUALITY_BLEP: + case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: + return 0; + + case RESAMPLER_QUALITY_CUBIC: + return 1; + + case RESAMPLER_QUALITY_SINC: + return SINC_WIDTH - 1; + } +} + +static int resampler_output_delay(resampler *r) +{ + switch (r->quality) + { + default: + case RESAMPLER_QUALITY_ZOH: + case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_CUBIC: + case RESAMPLER_QUALITY_SINC: + return 0; + + case RESAMPLER_QUALITY_BLEP: + case RESAMPLER_QUALITY_BLAM: + return SINC_WIDTH - 1; + } +} + +int resampler_ready(void *_r) +{ + resampler * r = ( resampler * ) _r; + return r->write_filled > resampler_min_filled(r); +} + +void resampler_clear(void *_r) +{ + resampler * r = ( resampler * ) _r; + r->write_pos = SINC_WIDTH - 1; + r->write_filled = 0; + r->read_pos = 0; + r->read_filled = 0; + r->phase = 0; + r->delay_added = -1; + r->delay_removed = -1; + memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); + memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); + if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM) + { + r->inv_phase = 0; + memset(r->buffer_out, 0, sizeof(r->buffer_out)); + } +} + +void resampler_set_rate(void *_r, double new_factor) +{ + resampler * r = ( resampler * ) _r; + r->phase_inc = new_factor; + new_factor = 1.0 / new_factor; + r->inv_phase_inc = new_factor; +} + +void resampler_write_sample(void *_r, short s) +{ + resampler * r = ( resampler * ) _r; + + if ( r->delay_added < 0 ) + { + r->delay_added = 0; + r->write_filled = resampler_input_delay( r ); + } + + if ( r->write_filled < resampler_buffer_size ) + { + float s32 = s; + s32 *= 256.0; + + r->buffer_in[ r->write_pos ] = s32; + r->buffer_in[ r->write_pos + resampler_buffer_size ] = s32; + + ++r->write_filled; + + r->write_pos = ( r->write_pos + 1 ) % resampler_buffer_size; + } +} + +void resampler_write_sample_fixed(void *_r, int s, unsigned char depth) +{ + resampler * r = ( resampler * ) _r; + + if ( r->delay_added < 0 ) + { + r->delay_added = 0; + r->write_filled = resampler_input_delay( r ); + } + + if ( r->write_filled < resampler_buffer_size ) + { + float s32 = s; + s32 /= (double)(1 << (depth - 1)); + + r->buffer_in[ r->write_pos ] = s32; + r->buffer_in[ r->write_pos + resampler_buffer_size ] = s32; + + ++r->write_filled; + + r->write_pos = ( r->write_pos + 1 ) % resampler_buffer_size; + } +} + +static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float sample; + + if ( out >= out_end ) + break; + + sample = *in; + *out++ = sample; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} + +#ifndef RESAMPLER_NEON +static int resampler_run_blep(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blep(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +static int resampler_run_linear(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float sample; + + if ( out >= out_end ) + break; + + sample = in[0] + (in[1] - in[0]) * phase; + *out++ = sample; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} + +#ifndef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * phase; + sample -= last_amp; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + in += (int)phase; + phase = fmod(phase, 1.0f); + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + { + sample += (in[1] - in[0]) * phase; + } + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * fphase; + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp1 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON +static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float * kernel; + int i; + float sample; + + if ( out >= out_end ) + break; + + kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4; + + for (sample = 0, i = 0; i < 4; ++i) + sample += in[i] * kernel[i]; + *out++ = sample; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + __m128 temp1, temp2; + __m128 samplex = _mm_setzero_ps(); + + if ( out >= out_end ) + break; + + temp1 = _mm_loadu_ps( (const float *)( in ) ); + temp2 = _mm_load_ps( (const float *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = _mm_mul_ps( temp1, temp2 ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = _mm_movehl_ps( temp1, samplex ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = samplex; + temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); + samplex = _mm_add_ps( samplex, temp1 ); + _mm_store_ss( out, samplex ); + ++out; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float32x4_t temp1, temp2; + float32x2_t half; + + if ( out >= out_end ) + break; + + temp1 = vld1q_f32( (const float32_t *)( in ) ); + temp2 = vld1q_f32( (const float32_t *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = vmulq_f32( temp1, temp2 ); + half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON +static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + float sample; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) + sample += in[i] * kernel[i]; + *out++ = (float)(sample / kernel_sum); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + // accumulate in extended precision + float kernel_sum = 0.0; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex = _mm_setzero_ps(); + float *kernelf = (float*)(&kernel); + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_loadu_ps( (const float *)( in + i * 4 ) ); + temp2 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, temp2 ); + samplex = _mm_add_ps( samplex, temp1 ); + } + kernel_sum = 1.0 / kernel_sum; + temp1 = _mm_movehl_ps( temp1, samplex ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = samplex; + temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = _mm_set_ss( kernel_sum ); + samplex = _mm_mul_ps( samplex, temp1 ); + _mm_store_ss( out, samplex ); + ++out; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + // accumulate in extended precision + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex = {0}; + float32x2_t half; + float *kernelf = (float*)(&kernel); + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) ); + temp2 = vld1q_f32( (const float32_t *)( kernel + i ) ); + samplex = vmlaq_f32( samplex, temp1, temp2 ); + } + kernel_sum = 1.0 / kernel_sum; + samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum)); + half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +static void resampler_fill(resampler * r) +{ + int min_filled = resampler_min_filled(r); + int quality = r->quality; + while ( r->write_filled > min_filled && + r->read_filled < resampler_buffer_size ) + { + int write_pos = ( r->read_pos + r->read_filled ) % resampler_buffer_size; + int write_size = resampler_buffer_size - write_pos; + float * out = r->buffer_out + write_pos; + if ( write_size > ( resampler_buffer_size - r->read_filled ) ) + write_size = resampler_buffer_size - r->read_filled; + switch (quality) + { + case RESAMPLER_QUALITY_ZOH: + resampler_run_zoh( r, &out, out + write_size ); + break; + + case RESAMPLER_QUALITY_BLEP: + { + int used; + int write_extra = 0; + if ( write_pos >= r->read_pos ) + write_extra = r->read_pos; + if ( write_extra > SINC_WIDTH * 2 - 1 ) + write_extra = SINC_WIDTH * 2 - 1; + memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + used = resampler_run_blep_sse( r, &out, out + write_size + write_extra ); + else +#endif + used = resampler_run_blep( r, &out, out + write_size + write_extra ); + memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); + if (!used) + return; + break; + } + + case RESAMPLER_QUALITY_LINEAR: + resampler_run_linear( r, &out, out + write_size ); + break; + + case RESAMPLER_QUALITY_BLAM: + { + float * out_ = out; + int write_extra = 0; + if ( write_pos >= r->read_pos ) + write_extra = r->read_pos; + if ( write_extra > SINC_WIDTH * 2 - 1 ) + write_extra = SINC_WIDTH * 2 - 1; + memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_blam_sse( r, &out, out + write_size + write_extra ); + else +#endif + resampler_run_blam( r, &out, out + write_size + write_extra ); + memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); + if ( out == out_ ) + return; + break; + } + + case RESAMPLER_QUALITY_CUBIC: +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_cubic_sse( r, &out, out + write_size ); + else +#endif + resampler_run_cubic( r, &out, out + write_size ); + break; + + case RESAMPLER_QUALITY_SINC: +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_sinc_sse( r, &out, out + write_size ); + else +#endif + resampler_run_sinc( r, &out, out + write_size ); + break; + } + r->read_filled += out - r->buffer_out - write_pos; + } +} + +static void resampler_fill_and_remove_delay(resampler * r) +{ + resampler_fill( r ); + if ( r->delay_removed < 0 ) + { + int delay = resampler_output_delay( r ); + r->delay_removed = 0; + while ( delay-- ) + resampler_remove_sample( r, 1 ); + } +} + +int resampler_get_sample_count(void *_r) +{ + resampler * r = ( resampler * ) _r; + if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc)) + resampler_fill_and_remove_delay( r ); + return r->read_filled; +} + +int resampler_get_sample(void *_r) +{ + resampler * r = ( resampler * ) _r; + if ( r->read_filled < 1 && r->phase_inc) + resampler_fill_and_remove_delay( r ); + if ( r->read_filled < 1 ) + return 0; + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) + return (int)(r->buffer_out[ r->read_pos ] + r->accumulator); + else + return (int)r->buffer_out[ r->read_pos ]; +} + +float resampler_get_sample_float(void *_r) +{ + resampler * r = ( resampler * ) _r; + if ( r->read_filled < 1 && r->phase_inc) + resampler_fill_and_remove_delay( r ); + if ( r->read_filled < 1 ) + return 0; + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) + return r->buffer_out[ r->read_pos ] + r->accumulator; + else + return r->buffer_out[ r->read_pos ]; +} + +void resampler_remove_sample(void *_r, int decay) +{ + resampler * r = ( resampler * ) _r; + if ( r->read_filled > 0 ) + { + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) + { + r->accumulator += r->buffer_out[ r->read_pos ]; + r->buffer_out[ r->read_pos ] = 0; + if (decay) + { + r->accumulator -= r->accumulator * (1.0f / 8192.0f); + if (fabs(r->accumulator) < 1e-20f) + r->accumulator = 0; + } + } + --r->read_filled; + r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size; + } +} diff --git a/Frameworks/playptmod/playptmod/resampler.h b/Frameworks/playptmod/playptmod/resampler.h new file mode 100644 index 000000000..0050ebf1a --- /dev/null +++ b/Frameworks/playptmod/playptmod/resampler.h @@ -0,0 +1,58 @@ +#ifndef _RESAMPLER_H_ +#define _RESAMPLER_H_ + +// Ugglay +#ifdef RESAMPLER_DECORATE +#define PASTE(a,b) a ## b +#define EVALUATE(a,b) PASTE(a,b) +#define resampler_init EVALUATE(RESAMPLER_DECORATE,_resampler_init) +#define resampler_create EVALUATE(RESAMPLER_DECORATE,_resampler_create) +#define resampler_delete EVALUATE(RESAMPLER_DECORATE,_resampler_delete) +#define resampler_dup EVALUATE(RESAMPLER_DECORATE,_resampler_dup) +#define resampler_dup_inplace EVALUATE(RESAMPLER_DECORATE,_resampler_dup_inplace) +#define resampler_set_quality EVALUATE(RESAMPLER_DECORATE,_resampler_set_quality) +#define resampler_get_free_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_free_count) +#define resampler_write_sample EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample) +#define resampler_write_sample_fixed EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample_fixed) +#define resampler_set_rate EVALUATE(RESAMPLER_DECORATE,_resampler_set_rate) +#define resampler_ready EVALUATE(RESAMPLER_DECORATE,_resampler_ready) +#define resampler_clear EVALUATE(RESAMPLER_DECORATE,_resampler_clear) +#define resampler_get_sample_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_count) +#define resampler_get_sample EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample) +#define resampler_get_sample_float EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_float) +#define resampler_remove_sample EVALUATE(RESAMPLER_DECORATE,_resampler_remove_sample) +#endif + +void resampler_init(void); + +void * resampler_create(void); +void resampler_delete(void *); +void * resampler_dup(const void *); +void resampler_dup_inplace(void *, const void *); + +enum +{ + RESAMPLER_QUALITY_MIN = 0, + RESAMPLER_QUALITY_ZOH = 0, + RESAMPLER_QUALITY_BLEP = 1, + RESAMPLER_QUALITY_LINEAR = 2, + RESAMPLER_QUALITY_BLAM = 3, + RESAMPLER_QUALITY_CUBIC = 4, + RESAMPLER_QUALITY_SINC = 5, + RESAMPLER_QUALITY_MAX = 5 +}; + +void resampler_set_quality(void *, int quality); + +int resampler_get_free_count(void *); +void resampler_write_sample(void *, short sample); +void resampler_write_sample_fixed(void *, int sample, unsigned char depth); +void resampler_set_rate( void *, double new_factor ); +int resampler_ready(void *); +void resampler_clear(void *); +int resampler_get_sample_count(void *); +int resampler_get_sample(void *); +float resampler_get_sample_float(void *); +void resampler_remove_sample(void *, int decay); + +#endif diff --git a/Plugins/BASSMODS/BASSMODS/BASSDecoder.mm b/Plugins/BASSMODS/BASSMODS/BASSDecoder.mm index c9322a35f..e721bd399 100755 --- a/Plugins/BASSMODS/BASSMODS/BASSDecoder.mm +++ b/Plugins/BASSMODS/BASSMODS/BASSDecoder.mm @@ -106,6 +106,8 @@ static void SyncProc( HSYNC handle, DWORD channel, DWORD data, void *user ) resampling_int = 0; else if ([resampling isEqualToString:@"linear"]) resampling_int = 1; + else if ([resampling isEqualToString:@"blam"]) + resampling_int = 1; else if ([resampling isEqualToString:@"cubic"]) resampling_int = 1; else if ([resampling isEqualToString:@"sinc"]) diff --git a/Plugins/Dumb/DumbDecoder.m b/Plugins/Dumb/DumbDecoder.m index 3898eb2fa..57add2dc8 100755 --- a/Plugins/Dumb/DumbDecoder.m +++ b/Plugins/Dumb/DumbDecoder.m @@ -205,10 +205,12 @@ int callbackLoop(void *data) resampling_int = 1; else if ([resampling isEqualToString:@"linear"]) resampling_int = 2; - else if ([resampling isEqualToString:@"cubic"]) + else if ([resampling isEqualToString:@"blam"]) resampling_int = 3; - else if ([resampling isEqualToString:@"sinc"]) + else if ([resampling isEqualToString:@"cubic"]) resampling_int = 4; + else if ([resampling isEqualToString:@"sinc"]) + resampling_int = 5; dumb_it_set_resampling_quality( itsr, resampling_int ); dumb_it_set_ramp_style(itsr, 2); diff --git a/Plugins/modplay/modplay/modDecoder.m b/Plugins/modplay/modplay/modDecoder.m index a4df6baa2..da9b76218 100755 --- a/Plugins/modplay/modplay/modDecoder.m +++ b/Plugins/modplay/modplay/modDecoder.m @@ -190,10 +190,12 @@ BOOL xm_probe_length( unsigned long * intro_length, unsigned long * loop_length, resampling_int = 1; else if ([resampling isEqualToString:@"linear"]) resampling_int = 2; - else if ([resampling isEqualToString:@"cubic"]) + else if ([resampling isEqualToString:@"blam"]) resampling_int = 3; - else if ([resampling isEqualToString:@"sinc"]) + else if ([resampling isEqualToString:@"cubic"]) resampling_int = 4; + else if ([resampling isEqualToString:@"sinc"]) + resampling_int = 5; if ( type == TYPE_S3M ) { diff --git a/Preferences/General/ResamplerBehaviorArrayController.m b/Preferences/General/ResamplerBehaviorArrayController.m index b980700d8..c6d91b5e9 100644 --- a/Preferences/General/ResamplerBehaviorArrayController.m +++ b/Preferences/General/ResamplerBehaviorArrayController.m @@ -28,6 +28,11 @@ NSLocalizedStringFromTableInBundle(@"Linear Interpolation", nil, [NSBundle bundleForClass:[self class]], @"") , @"name", @"linear", @"preference",nil]]; + [self addObject: + [NSDictionary dictionaryWithObjectsAndKeys: + NSLocalizedStringFromTableInBundle(@"Blam Synthesis", nil, [NSBundle bundleForClass:[self class]], @"") , @"name", + @"blam", @"preference",nil]]; + [self addObject: [NSDictionary dictionaryWithObjectsAndKeys: NSLocalizedStringFromTableInBundle(@"Cubic Interpolation", nil, [NSBundle bundleForClass:[self class]], @"") , @"name",