From 040a61e1ed1b1240b864abefe47d5dd655cca9fd Mon Sep 17 00:00:00 2001 From: Christopher Snowhill Date: Sat, 11 Jun 2022 02:25:40 -0700 Subject: [PATCH] [HRIR Filter] Replace implementation with vDSP Work back to a vDSP implementation, this time using overlap-save instead of overlap-add, also accumulating the results as complex values, only inversing them once at the end, and finally, replacing the FFT method with the newer DFT API. Signed-off-by: Christopher Snowhill --- Audio/Chain/HeadphoneFilter.h | 14 +- Audio/Chain/HeadphoneFilter.mm | 267 ++++++++++++----- Audio/Chain/HeadphoneFilterPFFFT.h | 45 +++ Audio/Chain/HeadphoneFilterPFFFT.mm | 447 ++++++++++++++++++++++++++++ 4 files changed, 689 insertions(+), 84 deletions(-) create mode 100644 Audio/Chain/HeadphoneFilterPFFFT.h create mode 100644 Audio/Chain/HeadphoneFilterPFFFT.mm diff --git a/Audio/Chain/HeadphoneFilter.h b/Audio/Chain/HeadphoneFilter.h index b5f5fbaf1..94d269bc1 100644 --- a/Audio/Chain/HeadphoneFilter.h +++ b/Audio/Chain/HeadphoneFilter.h @@ -8,21 +8,23 @@ #ifndef HeadphoneFilter_h #define HeadphoneFilter_h +#import #import -#import "pffft.h" - @interface HeadphoneFilter : NSObject { - PFFFT_Setup *fftSetup; + vDSP_DFT_Setup dftSetupF; + vDSP_DFT_Setup dftSetupB; size_t fftSize; + size_t fftSizeOver2; size_t bufferSize; size_t paddedBufferSize; size_t channelCount; - float *workBuffer; - - float **impulse_responses; + DSPSplitComplex signal_fft; + DSPSplitComplex input_filtered_signal_per_channel[2]; + DSPSplitComplex input_filtered_signal_totals[2]; + DSPSplitComplex *impulse_responses; float **prevInputs; diff --git a/Audio/Chain/HeadphoneFilter.mm b/Audio/Chain/HeadphoneFilter.mm index cdb647ebd..fc69c18c0 100644 --- a/Audio/Chain/HeadphoneFilter.mm +++ b/Audio/Chain/HeadphoneFilter.mm @@ -17,7 +17,14 @@ #import "lpc.h" #import "util.h" -#import "pffft_double.h" +// Apparently _mm_malloc is Intel-only on newer macOS targets, so use supported posix_memalign +static void *_memalign_malloc(size_t size, size_t align) { + void *ret = NULL; + if(posix_memalign(&ret, align, size) != 0) { + return NULL; + } + return ret; +} @implementation HeadphoneFilter @@ -148,7 +155,7 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { NSDictionary *properties = [decoder properties]; - double sampleRateOfSource = [[properties objectForKey:@"sampleRate"] doubleValue]; + double sampleRateOfSource = [[properties objectForKey:@"sampleRate"] floatValue]; int sampleCount = [[properties objectForKey:@"totalFrames"] intValue]; int impulseChannels = [[properties objectForKey:@"channels"] intValue]; @@ -165,7 +172,7 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { return nil; } - float *impulseBuffer = (float *)pffft_aligned_malloc(sampleCount * sizeof(float) * impulseChannels); + float *impulseBuffer = (float *)_memalign_malloc(sampleCount * sizeof(float) * impulseChannels, 16); if(!impulseBuffer) { [decoder close]; decoder = nil; @@ -175,7 +182,6 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { } if([decoder readAudio:impulseBuffer frames:sampleCount] != sampleCount) { - pffft_aligned_free(impulseBuffer); [decoder close]; decoder = nil; [source close]; @@ -206,18 +212,18 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { int resamplerLatencyIn = (int)N_samples_to_add_; int resamplerLatencyOut = (int)N_samples_to_drop_; - float *tempImpulse = (float *)pffft_aligned_malloc((sampleCount + resamplerLatencyIn * 2 + 1024) * sizeof(float) * impulseChannels); + float *tempImpulse = (float *)_memalign_malloc((sampleCount + resamplerLatencyIn * 2 + 1024) * sizeof(float) * impulseChannels, 16); if(!tempImpulse) { - pffft_aligned_free(impulseBuffer); + free(impulseBuffer); return nil; } resampledCount += resamplerLatencyOut * 2 + 1024; - float *resampledImpulse = (float *)pffft_aligned_malloc(resampledCount * sizeof(float) * impulseChannels); + float *resampledImpulse = (float *)_memalign_malloc(resampledCount * sizeof(float) * impulseChannels, 16); if(!resampledImpulse) { - pffft_aligned_free(impulseBuffer); - pffft_aligned_free(tempImpulse); + free(tempImpulse); + free(impulseBuffer); return nil; } @@ -227,6 +233,7 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { size_t extrapolate_buffer_size = 0; memcpy(tempImpulse + resamplerLatencyIn * impulseChannels, impulseBuffer, sampleCount * sizeof(float) * impulseChannels); + free(impulseBuffer); lpc_extrapolate_bkwd(tempImpulse + N_samples_to_add_ * impulseChannels, sampleCount, prime, impulseChannels, LPC_ORDER, N_samples_to_add_, &extrapolate_buffer, &extrapolate_buffer_size); lpc_extrapolate_fwd(tempImpulse + N_samples_to_add_ * impulseChannels, sampleCount, prime, impulseChannels, LPC_ORDER, N_samples_to_add_, &extrapolate_buffer, &extrapolate_buffer_size); free(extrapolate_buffer); @@ -236,6 +243,8 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { outputDone = _r8bstate->resample(tempImpulse, sampleCount + N_samples_to_add_ * 2, &inputDone, resampledImpulse, resampledCount); + free(tempImpulse); + if (outputDone < resampledCount) { outputDone += _r8bstate->flush(resampledImpulse + outputDone * impulseChannels, resampledCount - outputDone); } @@ -246,8 +255,6 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { memmove(resampledImpulse, resampledImpulse + N_samples_to_drop_ * impulseChannels, outputDone * sizeof(float) * impulseChannels); - pffft_aligned_free(tempImpulse); - pffft_aligned_free(impulseBuffer); impulseBuffer = resampledImpulse; sampleCount = (int)outputDone; @@ -261,11 +268,16 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { bufferSize = 512; fftSize = sampleCount + bufferSize; - fftSize = (size_t)pffftd_next_power_of_two((int)fftSize); + int pow = 1; + while(fftSize > 2) { + pow++; + fftSize /= 2; + } + fftSize = 2 << pow; - float *deinterleavedImpulseBuffer = (float *)pffft_aligned_malloc(fftSize * sizeof(float) * impulseChannels); + float *deinterleavedImpulseBuffer = (float *)_memalign_malloc(fftSize * sizeof(float) * impulseChannels, 16); if(!deinterleavedImpulseBuffer) { - pffft_aligned_free(impulseBuffer); + free(impulseBuffer); return nil; } @@ -274,40 +286,78 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { vDSP_vclr(deinterleavedImpulseBuffer + i * fftSize + sampleCount, 1, fftSize - sampleCount); } - pffft_aligned_free(impulseBuffer); + free(impulseBuffer); paddedBufferSize = fftSize; + fftSizeOver2 = (fftSize + 1) / 2; - fftSetup = pffft_new_setup((int)fftSize, PFFFT_REAL); - if(!fftSetup) { - pffft_aligned_free(deinterleavedImpulseBuffer); + dftSetupF = vDSP_DFT_zrop_CreateSetup(nil, fftSize, vDSP_DFT_FORWARD); + dftSetupB = vDSP_DFT_zrop_CreateSetup(nil, fftSize, vDSP_DFT_INVERSE); + if(!dftSetupF || !dftSetupB) { + free(deinterleavedImpulseBuffer); return nil; } - workBuffer = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); - if(!workBuffer) { - pffft_aligned_free(deinterleavedImpulseBuffer); - return nil; - } - - paddedSignal = (float *)pffft_aligned_malloc(sizeof(float) * paddedBufferSize); + paddedSignal = (float *)_memalign_malloc(sizeof(float) * paddedBufferSize, 16); if(!paddedSignal) { - pffft_aligned_free(deinterleavedImpulseBuffer); + free(deinterleavedImpulseBuffer); return nil; } - impulse_responses = (float **)calloc(sizeof(float *), channels * 2); + signal_fft.realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + signal_fft.imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + if(!signal_fft.realp || !signal_fft.imagp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + input_filtered_signal_per_channel[0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + input_filtered_signal_per_channel[0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + if(!input_filtered_signal_per_channel[0].realp || + !input_filtered_signal_per_channel[0].imagp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + input_filtered_signal_per_channel[1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + input_filtered_signal_per_channel[1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + if(!input_filtered_signal_per_channel[1].realp || + !input_filtered_signal_per_channel[1].imagp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + input_filtered_signal_totals[0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + input_filtered_signal_totals[0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + if(!input_filtered_signal_totals[0].realp || + !input_filtered_signal_totals[0].imagp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + input_filtered_signal_totals[1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + input_filtered_signal_totals[1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + if(!input_filtered_signal_totals[1].realp || + !input_filtered_signal_totals[1].imagp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + impulse_responses = (DSPSplitComplex *)calloc(sizeof(DSPSplitComplex), channels * 2); if(!impulse_responses) { - pffft_aligned_free(deinterleavedImpulseBuffer); + free(deinterleavedImpulseBuffer); return nil; } for(size_t i = 0; i < channels; ++i) { - impulse_responses[i * 2 + 0] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize * 2); - impulse_responses[i * 2 + 1] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize * 2); + impulse_responses[i * 2 + 0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + impulse_responses[i * 2 + 0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + impulse_responses[i * 2 + 1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); + impulse_responses[i * 2 + 1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2, 16); - if(!impulse_responses[i * 2 + 0] || !impulse_responses[i * 2 + 1]) { - pffft_aligned_free(deinterleavedImpulseBuffer); + if(!impulse_responses[i * 2 + 0].realp || !impulse_responses[i * 2 + 0].imagp || + !impulse_responses[i * 2 + 1].realp || !impulse_responses[i * 2 + 1].imagp) { + free(deinterleavedImpulseBuffer); return nil; } @@ -330,45 +380,63 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { } if(leftInChannel == speaker_is_back_center || rightInChannel == speaker_is_back_center) { + float *temp; if(impulseChannels == 7) { - cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, impulse_responses[i * 2 + 0], 1); - vDSP_vadd(impulse_responses[i * 2 + 0], 1, deinterleavedImpulseBuffer + 5 * fftSize, 1, impulse_responses[i * 2 + 0], 1, fftSize); - cblas_scopy((int)fftSize, impulse_responses[i * 2 + 0], 1, impulse_responses[i * 2 + 1], 1); - } else { - cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, impulse_responses[i * 2 + 0], 1); - vDSP_vadd(impulse_responses[i * 2 + 0], 1, deinterleavedImpulseBuffer + 12 * fftSize, 1, impulse_responses[i * 2 + 0], 1, fftSize); + temp = (float *)malloc(sizeof(float) * fftSize); + if(!temp) { + free(deinterleavedImpulseBuffer); + return nil; + } - cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 5 * fftSize, 1, impulse_responses[i * 2 + 1], 1); - vDSP_vadd(impulse_responses[i * 2 + 1], 1, deinterleavedImpulseBuffer + 11 * fftSize, 1, impulse_responses[i * 2 + 1], 1, fftSize); + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, temp, 1); + vDSP_vadd(temp, 1, deinterleavedImpulseBuffer + 5 * fftSize, 1, temp, 1, fftSize); + + vDSP_ctoz((DSPComplex *)temp, 2, &impulse_responses[i * 2 + 0], 1, fftSizeOver2); + vDSP_ctoz((DSPComplex *)temp, 2, &impulse_responses[i * 2 + 1], 1, fftSizeOver2); + } else { + temp = (float *)malloc(sizeof(float) * fftSize * 2); + if(!temp) { + free(deinterleavedImpulseBuffer); + return nil; + } + + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, temp, 1); + vDSP_vadd(temp, 1, deinterleavedImpulseBuffer + 12 * fftSize, 1, temp, 1, fftSize); + + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 5 * fftSize, 1, temp + fftSize, 1); + vDSP_vadd(temp + fftSize, 1, deinterleavedImpulseBuffer + 11 * fftSize, 1, temp + fftSize, 1, fftSize); + + vDSP_ctoz((DSPComplex *)temp, 2, &impulse_responses[i * 2 + 0], 1, fftSizeOver2); + vDSP_ctoz((DSPComplex *)(temp + fftSize), 2, &impulse_responses[i * 2 + 1], 1, fftSizeOver2); } + + free(temp); } else if(leftInChannel == speaker_not_present || rightInChannel == speaker_not_present) { - vDSP_vclr(impulse_responses[i * 2 + 0], 1, fftSize); - vDSP_vclr(impulse_responses[i * 2 + 1], 1, fftSize); + vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + impulseChannels * fftSize), 2, &impulse_responses[i * 2 + 0], 1, fftSizeOver2); + vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + impulseChannels * fftSize), 2, &impulse_responses[i * 2 + 1], 1, fftSizeOver2); } else { - cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + leftInChannel * fftSize, 1, impulse_responses[i * 2 + 0], 1); - cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + rightInChannel * fftSize, 1, impulse_responses[i * 2 + 1], 1); + vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + leftInChannel * fftSize), 2, &impulse_responses[i * 2 + 0], 1, fftSizeOver2); + vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + rightInChannel * fftSize), 2, &impulse_responses[i * 2 + 1], 1, fftSizeOver2); } - pffft_transform(fftSetup, impulse_responses[i * 2 + 0], impulse_responses[i * 2 + 0], workBuffer, PFFFT_FORWARD); - pffft_transform(fftSetup, impulse_responses[i * 2 + 1], impulse_responses[i * 2 + 1], workBuffer, PFFFT_FORWARD); + vDSP_DFT_Execute(dftSetupF, impulse_responses[i * 2 + 0].realp, impulse_responses[i * 2 + 0].imagp, impulse_responses[i * 2 + 0].realp, impulse_responses[i * 2 + 0].imagp); + vDSP_DFT_Execute(dftSetupF, impulse_responses[i * 2 + 1].realp, impulse_responses[i * 2 + 1].imagp, impulse_responses[i * 2 + 1].realp, impulse_responses[i * 2 + 1].imagp); } - pffft_aligned_free(deinterleavedImpulseBuffer); + free(deinterleavedImpulseBuffer); - left_result = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); - right_result = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); + left_result = (float *)_memalign_malloc(sizeof(float) * fftSize, 16); + right_result = (float *)_memalign_malloc(sizeof(float) * fftSize, 16); if(!left_result || !right_result) return nil; - prevInputs = (float **)calloc(sizeof(float *), channels); - if(!prevInputs) { + prevInputs = (float **)calloc(channels, sizeof(float *)); + if(!prevInputs) return nil; - } for(size_t i = 0; i < channels; ++i) { - prevInputs[i] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); - if(!prevInputs[i]) { + prevInputs[i] = (float *)_memalign_malloc(sizeof(float) * fftSize, 16); + if(!prevInputs[i]) return nil; - } vDSP_vclr(prevInputs[i], 1, fftSize); } } @@ -377,59 +445,102 @@ static const int8_t speakers_to_hesuvi_14[11][2] = { } - (void)dealloc { - if(fftSetup) pffft_destroy_setup(fftSetup); + if(dftSetupF) vDSP_DFT_DestroySetup(dftSetupF); + if(dftSetupB) vDSP_DFT_DestroySetup(dftSetupB); - pffft_aligned_free(workBuffer); + free(paddedSignal); - pffft_aligned_free(paddedSignal); + free(signal_fft.realp); + free(signal_fft.imagp); + + free(input_filtered_signal_per_channel[0].realp); + free(input_filtered_signal_per_channel[0].imagp); + free(input_filtered_signal_per_channel[1].realp); + free(input_filtered_signal_per_channel[1].imagp); + + free(input_filtered_signal_totals[0].realp); + free(input_filtered_signal_totals[0].imagp); + free(input_filtered_signal_totals[1].realp); + free(input_filtered_signal_totals[1].imagp); if(impulse_responses) { for(size_t i = 0; i < channelCount * 2; ++i) { - pffft_aligned_free(impulse_responses[i]); + free(impulse_responses[i].realp); + free(impulse_responses[i].imagp); } free(impulse_responses); } + free(left_result); + free(right_result); + if(prevInputs) { for(size_t i = 0; i < channelCount; ++i) { - pffft_aligned_free(prevInputs[i]); + free(prevInputs[i]); } free(prevInputs); } - - pffft_aligned_free(left_result); - pffft_aligned_free(right_result); } - (void)process:(const float *)inBuffer sampleCount:(size_t)count toBuffer:(float *)outBuffer { - const float scale = 1.0 / ((float)fftSize); + const float scale = 1.0 / (4.0 * (float)fftSize); while(count > 0) { const size_t countToDo = (count > bufferSize) ? bufferSize : count; - const size_t outOffset = fftSize - countToDo; + const size_t prevToDo = fftSize - countToDo; - vDSP_vclr(left_result, 1, fftSize); - vDSP_vclr(right_result, 1, fftSize); + vDSP_vclr(input_filtered_signal_totals[0].realp, 1, fftSizeOver2); + vDSP_vclr(input_filtered_signal_totals[0].imagp, 1, fftSizeOver2); + vDSP_vclr(input_filtered_signal_totals[1].realp, 1, fftSizeOver2); + vDSP_vclr(input_filtered_signal_totals[1].imagp, 1, fftSizeOver2); for(size_t i = 0; i < channelCount; ++i) { - cblas_scopy((int)outOffset, prevInputs[i] + countToDo, 1, paddedSignal, 1); - cblas_scopy((int)countToDo, inBuffer + i, (int)channelCount, paddedSignal + outOffset, 1); + cblas_scopy((int)prevToDo, prevInputs[i] + countToDo, 1, paddedSignal, 1); + cblas_scopy((int)countToDo, inBuffer + i, (int)channelCount, paddedSignal + prevToDo, 1); cblas_scopy((int)fftSize, paddedSignal, 1, prevInputs[i], 1); - pffft_transform(fftSetup, paddedSignal, paddedSignal, workBuffer, PFFFT_FORWARD); + vDSP_ctoz((DSPComplex *)paddedSignal, 2, &signal_fft, 1, fftSizeOver2); - pffft_zconvolve_accumulate(fftSetup, paddedSignal, impulse_responses[i * 2 + 0], left_result, 1.0); - pffft_zconvolve_accumulate(fftSetup, paddedSignal, impulse_responses[i * 2 + 1], right_result, 1.0); + vDSP_DFT_Execute(dftSetupF, signal_fft.realp, signal_fft.imagp, signal_fft.realp, signal_fft.imagp); + + // One channel forward, then multiply and back twice + + float preserveIRNyq = impulse_responses[i * 2 + 0].imagp[0]; + float preserveSigNyq = signal_fft.imagp[0]; + impulse_responses[i * 2 + 0].imagp[0] = 0; + signal_fft.imagp[0] = 0; + + vDSP_zvmul(&signal_fft, 1, &impulse_responses[i * 2 + 0], 1, &input_filtered_signal_per_channel[0], 1, fftSizeOver2, 1); + + input_filtered_signal_per_channel[0].imagp[0] = preserveIRNyq * preserveSigNyq; + impulse_responses[i * 2 + 0].imagp[0] = preserveIRNyq; + + preserveIRNyq = impulse_responses[i * 2 + 1].imagp[0]; + impulse_responses[i * 2 + 1].imagp[0] = 0; + + vDSP_zvmul(&signal_fft, 1, &impulse_responses[i * 2 + 1], 1, &input_filtered_signal_per_channel[1], 1, fftSizeOver2, 1); + + input_filtered_signal_per_channel[1].imagp[0] = preserveIRNyq * preserveSigNyq; + impulse_responses[i * 2 + 1].imagp[0] = preserveIRNyq; + + vDSP_zvadd(&input_filtered_signal_totals[0], 1, &input_filtered_signal_per_channel[0], 1, &input_filtered_signal_totals[0], 1, fftSizeOver2); + vDSP_zvadd(&input_filtered_signal_totals[1], 1, &input_filtered_signal_per_channel[1], 1, &input_filtered_signal_totals[1], 1, fftSizeOver2); } - pffft_transform(fftSetup, left_result, left_result, workBuffer, PFFFT_BACKWARD); - pffft_transform(fftSetup, right_result, right_result, workBuffer, PFFFT_BACKWARD); + vDSP_DFT_Execute(dftSetupB, input_filtered_signal_totals[0].realp, input_filtered_signal_totals[0].imagp, input_filtered_signal_totals[0].realp, input_filtered_signal_totals[0].imagp); + vDSP_DFT_Execute(dftSetupB, input_filtered_signal_totals[1].realp, input_filtered_signal_totals[1].imagp, input_filtered_signal_totals[1].realp, input_filtered_signal_totals[1].imagp); - vDSP_vsmul(left_result + outOffset, 1, &scale, left_result + outOffset, 1, countToDo); - vDSP_vsmul(right_result + outOffset, 1, &scale, right_result + outOffset, 1, countToDo); + vDSP_ztoc(&input_filtered_signal_totals[0], 1, (DSPComplex *)left_result, 2, fftSizeOver2); + vDSP_ztoc(&input_filtered_signal_totals[1], 1, (DSPComplex *)right_result, 2, fftSizeOver2); - cblas_scopy((int)countToDo, left_result + outOffset, 1, outBuffer + 0, 2); - cblas_scopy((int)countToDo, right_result + outOffset, 1, outBuffer + 1, 2); + float *left_ptr = left_result + prevToDo; + float *right_ptr = right_result + prevToDo; + + vDSP_vsmul(left_ptr, 1, &scale, left_ptr, 1, countToDo); + vDSP_vsmul(right_ptr, 1, &scale, right_ptr, 1, countToDo); + + cblas_scopy((int)countToDo, left_ptr, 1, outBuffer + 0, 2); + cblas_scopy((int)countToDo, right_ptr, 1, outBuffer + 1, 2); inBuffer += countToDo * channelCount; outBuffer += countToDo * 2; diff --git a/Audio/Chain/HeadphoneFilterPFFFT.h b/Audio/Chain/HeadphoneFilterPFFFT.h new file mode 100644 index 000000000..b5f5fbaf1 --- /dev/null +++ b/Audio/Chain/HeadphoneFilterPFFFT.h @@ -0,0 +1,45 @@ +// +// HeadphoneFilter.h +// CogAudio Framework +// +// Created by Christopher Snowhill on 1/24/22. +// + +#ifndef HeadphoneFilter_h +#define HeadphoneFilter_h + +#import + +#import "pffft.h" + +@interface HeadphoneFilter : NSObject { + PFFFT_Setup *fftSetup; + + size_t fftSize; + size_t bufferSize; + size_t paddedBufferSize; + size_t channelCount; + + float *workBuffer; + + float **impulse_responses; + + float **prevInputs; + + float *left_result; + float *right_result; + + float *paddedSignal; +} + ++ (BOOL)validateImpulseFile:(NSURL *)url; + +- (id)initWithImpulseFile:(NSURL *)url forSampleRate:(double)sampleRate withInputChannels:(size_t)channels withConfig:(uint32_t)config; + +- (void)process:(const float *)inBuffer sampleCount:(size_t)count toBuffer:(float *)outBuffer; + +- (void)reset; + +@end + +#endif /* HeadphoneFilter_h */ diff --git a/Audio/Chain/HeadphoneFilterPFFFT.mm b/Audio/Chain/HeadphoneFilterPFFFT.mm new file mode 100644 index 000000000..65355e8ac --- /dev/null +++ b/Audio/Chain/HeadphoneFilterPFFFT.mm @@ -0,0 +1,447 @@ +// +// HeadphoneFilter.m +// CogAudio Framework +// +// Created by Christopher Snowhill on 1/24/22. +// + +#import "AudioChunk.h" +#import "AudioDecoder.h" +#import "AudioSource.h" +#import "HeadphoneFilter.h" + +#import + +#import "r8bstate.h" + +#import "lpc.h" +#import "util.h" + +#import "pffft_double.h" + +@implementation HeadphoneFilter + +enum { + speaker_is_back_center = -1, + speaker_not_present = -2, +}; + +static const uint32_t max_speaker_index = 10; + +static const int8_t speakers_to_hesuvi_7[11][2] = { + // front left + { 0, 1 }, + // front right + { 1, 0 }, + // front center + { 6, 6 }, + // lfe + { 6, 6 }, + // back left + { 4, 5 }, + // back right + { 5, 4 }, + // front center left + { speaker_not_present, speaker_not_present }, + // front center right + { speaker_not_present, speaker_not_present }, + // back center + { speaker_is_back_center, speaker_is_back_center }, + // side left + { 2, 3 }, + // side right + { 3, 2 } +}; + +static const int8_t speakers_to_hesuvi_14[11][2] = { + // front left + { 0, 1 }, + // front right + { 8, 7 }, + // front center + { 6, 13 }, + // lfe + { 6, 13 }, + // back left + { 4, 5 }, + // back right + { 12, 11 }, + // front center left + { speaker_not_present, speaker_not_present }, + // front center right + { speaker_not_present, speaker_not_present }, + // back center + { speaker_is_back_center, speaker_is_back_center }, + // side left + { 2, 3 }, + // side right + { 10, 9 } +}; + ++ (BOOL)validateImpulseFile:(NSURL *)url { + id source = [AudioSource audioSourceForURL:url]; + if(!source) + return NO; + + if(![source open:url]) + return NO; + + id decoder = [AudioDecoder audioDecoderForSource:source]; + + if(decoder == nil) { + [source close]; + source = nil; + return NO; + } + + if(![decoder open:source]) { + decoder = nil; + [source close]; + source = nil; + return NO; + } + + NSDictionary *properties = [decoder properties]; + + [decoder close]; + decoder = nil; + [source close]; + source = nil; + + int impulseChannels = [[properties objectForKey:@"channels"] intValue]; + + if([[properties objectForKey:@"floatingPoint"] boolValue] != YES || + [[properties objectForKey:@"bitsPerSample"] intValue] != 32 || + !([[properties objectForKey:@"endian"] isEqualToString:@"host"] || + [[properties objectForKey:@"endian"] isEqualToString:@"little"]) || + (impulseChannels != 14 && impulseChannels != 7)) + return NO; + + return YES; +} + +- (id)initWithImpulseFile:(NSURL *)url forSampleRate:(double)sampleRate withInputChannels:(size_t)channels withConfig:(uint32_t)config { + self = [super init]; + + if(self) { + id source = [AudioSource audioSourceForURL:url]; + if(!source) + return nil; + + if(![source open:url]) + return nil; + + id decoder = [AudioDecoder audioDecoderForSource:source]; + + if(decoder == nil) { + [source close]; + source = nil; + return nil; + } + + if(![decoder open:source]) { + decoder = nil; + [source close]; + source = nil; + return nil; + } + + NSDictionary *properties = [decoder properties]; + + double sampleRateOfSource = [[properties objectForKey:@"sampleRate"] doubleValue]; + + int sampleCount = [[properties objectForKey:@"totalFrames"] intValue]; + int impulseChannels = [[properties objectForKey:@"channels"] intValue]; + + if([[properties objectForKey:@"floatingPoint"] boolValue] != YES || + [[properties objectForKey:@"bitsPerSample"] intValue] != 32 || + !([[properties objectForKey:@"endian"] isEqualToString:@"host"] || + [[properties objectForKey:@"endian"] isEqualToString:@"little"]) || + (impulseChannels != 14 && impulseChannels != 7)) { + [decoder close]; + decoder = nil; + [source close]; + source = nil; + return nil; + } + + float *impulseBuffer = (float *)pffft_aligned_malloc(sampleCount * sizeof(float) * impulseChannels); + if(!impulseBuffer) { + [decoder close]; + decoder = nil; + [source close]; + source = nil; + return nil; + } + + if([decoder readAudio:impulseBuffer frames:sampleCount] != sampleCount) { + pffft_aligned_free(impulseBuffer); + [decoder close]; + decoder = nil; + [source close]; + source = nil; + return nil; + } + + [decoder close]; + decoder = nil; + [source close]; + source = nil; + + if(sampleRateOfSource != sampleRate) { + double sampleRatio = sampleRate / sampleRateOfSource; + int resampledCount = (int)ceil((double)sampleCount * sampleRatio); + + r8bstate *_r8bstate = new r8bstate(impulseChannels, 1024, sampleRateOfSource, sampleRate); + + unsigned long PRIME_LEN_ = MAX(sampleRateOfSource / 20, 1024u); + PRIME_LEN_ = MIN(PRIME_LEN_, 16384u); + PRIME_LEN_ = MAX(PRIME_LEN_, 2 * LPC_ORDER + 1); + + unsigned int N_samples_to_add_ = sampleRateOfSource; + unsigned int N_samples_to_drop_ = sampleRate; + + samples_len(&N_samples_to_add_, &N_samples_to_drop_, 20, 8192u); + + int resamplerLatencyIn = (int)N_samples_to_add_; + int resamplerLatencyOut = (int)N_samples_to_drop_; + + float *tempImpulse = (float *)pffft_aligned_malloc((sampleCount + resamplerLatencyIn * 2 + 1024) * sizeof(float) * impulseChannels); + if(!tempImpulse) { + pffft_aligned_free(impulseBuffer); + return nil; + } + + resampledCount += resamplerLatencyOut * 2 + 1024; + + float *resampledImpulse = (float *)pffft_aligned_malloc(resampledCount * sizeof(float) * impulseChannels); + if(!resampledImpulse) { + pffft_aligned_free(impulseBuffer); + pffft_aligned_free(tempImpulse); + return nil; + } + + size_t prime = MIN(sampleCount, PRIME_LEN_); + + void *extrapolate_buffer = NULL; + size_t extrapolate_buffer_size = 0; + + memcpy(tempImpulse + resamplerLatencyIn * impulseChannels, impulseBuffer, sampleCount * sizeof(float) * impulseChannels); + lpc_extrapolate_bkwd(tempImpulse + N_samples_to_add_ * impulseChannels, sampleCount, prime, impulseChannels, LPC_ORDER, N_samples_to_add_, &extrapolate_buffer, &extrapolate_buffer_size); + lpc_extrapolate_fwd(tempImpulse + N_samples_to_add_ * impulseChannels, sampleCount, prime, impulseChannels, LPC_ORDER, N_samples_to_add_, &extrapolate_buffer, &extrapolate_buffer_size); + free(extrapolate_buffer); + + size_t inputDone = 0; + size_t outputDone = 0; + + outputDone = _r8bstate->resample(tempImpulse, sampleCount + N_samples_to_add_ * 2, &inputDone, resampledImpulse, resampledCount); + + if(outputDone < resampledCount) { + outputDone += _r8bstate->flush(resampledImpulse + outputDone * impulseChannels, resampledCount - outputDone); + } + + delete _r8bstate; + + outputDone -= N_samples_to_drop_ * 2; + + memmove(resampledImpulse, resampledImpulse + N_samples_to_drop_ * impulseChannels, outputDone * sizeof(float) * impulseChannels); + + pffft_aligned_free(tempImpulse); + pffft_aligned_free(impulseBuffer); + impulseBuffer = resampledImpulse; + sampleCount = (int)outputDone; + + // Normalize resampled impulse by sample ratio + float fSampleRatio = (float)sampleRatio; + vDSP_vsdiv(impulseBuffer, 1, &fSampleRatio, impulseBuffer, 1, sampleCount * impulseChannels); + } + + channelCount = channels; + + bufferSize = 512; + fftSize = sampleCount + bufferSize; + + fftSize = (size_t)pffftd_next_power_of_two((int)fftSize); + + float *deinterleavedImpulseBuffer = (float *)pffft_aligned_malloc(fftSize * sizeof(float) * impulseChannels); + if(!deinterleavedImpulseBuffer) { + pffft_aligned_free(impulseBuffer); + return nil; + } + + for(size_t i = 0; i < impulseChannels; ++i) { + cblas_scopy(sampleCount, impulseBuffer + i, impulseChannels, deinterleavedImpulseBuffer + i * fftSize, 1); + vDSP_vclr(deinterleavedImpulseBuffer + i * fftSize + sampleCount, 1, fftSize - sampleCount); + } + + pffft_aligned_free(impulseBuffer); + + paddedBufferSize = fftSize; + + fftSetup = pffft_new_setup((int)fftSize, PFFFT_REAL); + if(!fftSetup) { + pffft_aligned_free(deinterleavedImpulseBuffer); + return nil; + } + + workBuffer = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); + if(!workBuffer) { + pffft_aligned_free(deinterleavedImpulseBuffer); + return nil; + } + + paddedSignal = (float *)pffft_aligned_malloc(sizeof(float) * paddedBufferSize); + if(!paddedSignal) { + pffft_aligned_free(deinterleavedImpulseBuffer); + return nil; + } + + impulse_responses = (float **)calloc(sizeof(float *), channels * 2); + if(!impulse_responses) { + pffft_aligned_free(deinterleavedImpulseBuffer); + return nil; + } + + for(size_t i = 0; i < channels; ++i) { + impulse_responses[i * 2 + 0] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize * 2); + impulse_responses[i * 2 + 1] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize * 2); + + if(!impulse_responses[i * 2 + 0] || !impulse_responses[i * 2 + 1]) { + pffft_aligned_free(deinterleavedImpulseBuffer); + return nil; + } + + uint32_t channelFlag = [AudioChunk extractChannelFlag:(uint32_t)i fromConfig:config]; + uint32_t channelIndex = [AudioChunk findChannelIndex:channelFlag]; + + int leftInChannel = speaker_not_present; + int rightInChannel = speaker_not_present; + + if(impulseChannels == 7) { + if(channelIndex <= max_speaker_index) { + leftInChannel = speakers_to_hesuvi_7[channelIndex][0]; + rightInChannel = speakers_to_hesuvi_7[channelIndex][1]; + } + } else { + if(channelIndex <= max_speaker_index) { + leftInChannel = speakers_to_hesuvi_14[channelIndex][0]; + rightInChannel = speakers_to_hesuvi_14[channelIndex][1]; + } + } + + if(leftInChannel == speaker_is_back_center || rightInChannel == speaker_is_back_center) { + if(impulseChannels == 7) { + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, impulse_responses[i * 2 + 0], 1); + vDSP_vadd(impulse_responses[i * 2 + 0], 1, deinterleavedImpulseBuffer + 5 * fftSize, 1, impulse_responses[i * 2 + 0], 1, fftSize); + cblas_scopy((int)fftSize, impulse_responses[i * 2 + 0], 1, impulse_responses[i * 2 + 1], 1); + } else { + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 4 * fftSize, 1, impulse_responses[i * 2 + 0], 1); + vDSP_vadd(impulse_responses[i * 2 + 0], 1, deinterleavedImpulseBuffer + 12 * fftSize, 1, impulse_responses[i * 2 + 0], 1, fftSize); + + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + 5 * fftSize, 1, impulse_responses[i * 2 + 1], 1); + vDSP_vadd(impulse_responses[i * 2 + 1], 1, deinterleavedImpulseBuffer + 11 * fftSize, 1, impulse_responses[i * 2 + 1], 1, fftSize); + } + } else if(leftInChannel == speaker_not_present || rightInChannel == speaker_not_present) { + vDSP_vclr(impulse_responses[i * 2 + 0], 1, fftSize); + vDSP_vclr(impulse_responses[i * 2 + 1], 1, fftSize); + } else { + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + leftInChannel * fftSize, 1, impulse_responses[i * 2 + 0], 1); + cblas_scopy((int)fftSize, deinterleavedImpulseBuffer + rightInChannel * fftSize, 1, impulse_responses[i * 2 + 1], 1); + } + + pffft_transform(fftSetup, impulse_responses[i * 2 + 0], impulse_responses[i * 2 + 0], workBuffer, PFFFT_FORWARD); + pffft_transform(fftSetup, impulse_responses[i * 2 + 1], impulse_responses[i * 2 + 1], workBuffer, PFFFT_FORWARD); + } + + pffft_aligned_free(deinterleavedImpulseBuffer); + + left_result = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); + right_result = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); + if(!left_result || !right_result) + return nil; + + prevInputs = (float **)calloc(sizeof(float *), channels); + if(!prevInputs) { + return nil; + } + for(size_t i = 0; i < channels; ++i) { + prevInputs[i] = (float *)pffft_aligned_malloc(sizeof(float) * fftSize); + if(!prevInputs[i]) { + return nil; + } + vDSP_vclr(prevInputs[i], 1, fftSize); + } + } + + return self; +} + +- (void)dealloc { + if(fftSetup) pffft_destroy_setup(fftSetup); + + pffft_aligned_free(workBuffer); + + pffft_aligned_free(paddedSignal); + + if(impulse_responses) { + for(size_t i = 0; i < channelCount * 2; ++i) { + pffft_aligned_free(impulse_responses[i]); + } + free(impulse_responses); + } + + if(prevInputs) { + for(size_t i = 0; i < channelCount; ++i) { + pffft_aligned_free(prevInputs[i]); + } + free(prevInputs); + } + + pffft_aligned_free(left_result); + pffft_aligned_free(right_result); +} + +- (void)process:(const float *)inBuffer sampleCount:(size_t)count toBuffer:(float *)outBuffer { + const float scale = 1.0 / ((float)fftSize); + + while(count > 0) { + const size_t countToDo = (count > bufferSize) ? bufferSize : count; + const size_t outOffset = fftSize - countToDo; + + vDSP_vclr(left_result, 1, fftSize); + vDSP_vclr(right_result, 1, fftSize); + + for(size_t i = 0; i < channelCount; ++i) { + cblas_scopy((int)outOffset, prevInputs[i] + countToDo, 1, paddedSignal, 1); + cblas_scopy((int)countToDo, inBuffer + i, (int)channelCount, paddedSignal + outOffset, 1); + cblas_scopy((int)fftSize, paddedSignal, 1, prevInputs[i], 1); + + pffft_transform(fftSetup, paddedSignal, paddedSignal, workBuffer, PFFFT_FORWARD); + + pffft_zconvolve_accumulate(fftSetup, paddedSignal, impulse_responses[i * 2 + 0], left_result, 1.0); + pffft_zconvolve_accumulate(fftSetup, paddedSignal, impulse_responses[i * 2 + 1], right_result, 1.0); + } + + pffft_transform(fftSetup, left_result, left_result, workBuffer, PFFFT_BACKWARD); + pffft_transform(fftSetup, right_result, right_result, workBuffer, PFFFT_BACKWARD); + + vDSP_vsmul(left_result + outOffset, 1, &scale, left_result + outOffset, 1, countToDo); + vDSP_vsmul(right_result + outOffset, 1, &scale, right_result + outOffset, 1, countToDo); + + cblas_scopy((int)countToDo, left_result + outOffset, 1, outBuffer + 0, 2); + cblas_scopy((int)countToDo, right_result + outOffset, 1, outBuffer + 1, 2); + + inBuffer += countToDo * channelCount; + outBuffer += countToDo * 2; + + count -= countToDo; + } +} + +- (void)reset { + for(size_t i = 0; i < channelCount; ++i) { + vDSP_vclr(prevInputs[i], 1, fftSize); + } +} + +@end