cog/Audio/Output/HeadphoneFilter.mm

517 lines
18 KiB
Plaintext

//
// HeadphoneFilter.m
// CogAudio Framework
//
// Created by Christopher Snowhill on 1/24/22.
//
#import "HeadphoneFilter.h"
#import "AudioChunk.h"
#import "AudioDecoder.h"
#import "AudioSource.h"
#import <stdlib.h>
#import <fstream>
#import "rsstate.h"
#import "HrtfData.h"
#import "Logging.h"
typedef struct speakerPosition {
float elevation;
float azimuth;
float distance;
} speakerPosition;
#define DEGREES(x) ((x)*M_PI / 180.0)
static const speakerPosition speakerPositions[18] = {
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(-30.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(+30.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(0.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(0.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(-135.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(+135.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(-15.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(+15.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(-180.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(-90.0), .distance = 1.0 },
{ .elevation = DEGREES(0.0), .azimuth = DEGREES(+90.0), .distance = 1.0 },
{ .elevation = DEGREES(+90.0), .azimuth = DEGREES(0.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(-30.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(0.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(+30.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(-135.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(0.0), .distance = 1.0 },
{ .elevation = DEGREES(+45.0), .azimuth = DEGREES(+135.0), .distance = 1.0 }
};
@interface impulseCacheObject : NSObject {
}
@property NSURL *URL;
@property int sampleCount;
@property int channelCount;
@property uint32_t channelConfig;
@property double sampleRate;
@property double targetSampleRate;
@property NSData *data;
@end
@implementation impulseCacheObject
@synthesize URL;
@synthesize sampleCount;
@synthesize channelCount;
@synthesize channelConfig;
@synthesize sampleRate;
@synthesize targetSampleRate;
@synthesize data;
@end
@interface impulseCache : NSObject {
}
@property NSMutableArray<impulseCacheObject *> *cacheObjects;
+ (impulseCache *)sharedController;
- (const float *)getImpulse:(NSURL *)url sampleCount:(int *)sampleCount channelCount:(int)channelCount channelConfig:(uint32_t)channelConfig sampleRate:(double)sampleRate;
@end
// Apparently _mm_malloc is Intel-only on newer macOS targets, so use supported posix_memalign
static void *_memalign_malloc(size_t size, size_t align) {
void *ret = NULL;
if(posix_memalign(&ret, align, size) != 0) {
return NULL;
}
return ret;
}
@implementation impulseCache
static impulseCache *_sharedController = nil;
+ (impulseCache *)sharedController {
@synchronized(self) {
if(!_sharedController) {
_sharedController = [[impulseCache alloc] init];
}
}
return _sharedController;
}
- (id)init {
self = [super init];
if(self) {
self.cacheObjects = [[NSMutableArray alloc] init];
}
return self;
}
- (impulseCacheObject *)addImpulse:(NSURL *)url sampleCount:(int)sampleCount channelCount:(int)channelCount channelConfig:(uint32_t)channelConfig originalSampleRate:(double)originalSampleRate targetSampleRate:(double)targetSampleRate impulseBuffer:(const float *)impulseBuffer {
impulseCacheObject *obj = [[impulseCacheObject alloc] init];
obj.URL = url;
obj.sampleCount = sampleCount;
obj.channelCount = channelCount;
obj.sampleRate = originalSampleRate;
obj.targetSampleRate = targetSampleRate;
obj.data = [NSData dataWithBytes:impulseBuffer length:(sampleCount * channelCount * sizeof(float) * 2)];
@synchronized(self.cacheObjects) {
[self.cacheObjects addObject:obj];
}
return obj;
}
- (const float *)getImpulse:(NSURL *)url sampleCount:(int *)retSampleCount channelCount:(int)channelCount channelConfig:(uint32_t)channelConfig sampleRate:(double)sampleRate {
BOOL impulseFound = NO;
const float *impulseData = NULL;
double sampleRateOfSource = 0;
int sampleCount = 0;
impulseCacheObject *cacheObject = nil;
@synchronized(self.cacheObjects) {
for(impulseCacheObject *obj in self.cacheObjects) {
if([obj.URL isEqualTo:url] &&
obj.targetSampleRate == sampleRate &&
obj.channelCount == channelCount &&
obj.channelConfig == channelConfig) {
*retSampleCount = obj.sampleCount;
return (const float *)[obj.data bytes];
}
}
for(impulseCacheObject *obj in self.cacheObjects) {
if([obj.URL isEqualTo:url] &&
obj.sampleRate == obj.targetSampleRate &&
obj.channelCount == channelCount &&
obj.channelConfig == channelConfig) {
impulseData = (const float *)[obj.data bytes];
sampleCount = obj.sampleCount;
sampleRateOfSource = obj.sampleRate;
impulseFound = YES;
break;
}
}
}
if(!impulseFound) {
NSString *filePath = [url path];
try {
std::ifstream file([filePath UTF8String], std::fstream::binary);
if(!file.is_open()) {
throw std::logic_error("Cannot open file.");
}
HrtfData data(file);
file.close();
sampleRateOfSource = data.get_sample_rate();
uint32_t sampleCountExact = data.get_response_length();
sampleCount = sampleCountExact + ((data.get_longest_delay() + 2) >> 2);
std::vector<float> hrtfData(sampleCount * channelCount * 2, 0.0);
for(uint32_t i = 0; i < channelCount; ++i) {
uint32_t channelFlag = [AudioChunk extractChannelFlag:i fromConfig:channelConfig];
uint32_t channelNumber = [AudioChunk findChannelIndex:channelFlag];
if(channelNumber < 18) {
const speakerPosition &speaker = speakerPositions[channelNumber];
DirectionData hrtfLeft;
DirectionData hrtfRight;
data.get_direction_data(speaker.elevation, speaker.azimuth, speaker.distance, hrtfLeft, hrtfRight);
cblas_scopy(sampleCountExact, &hrtfLeft.impulse_response[0], 1, &hrtfData[((hrtfLeft.delay + 2) >> 2) * channelCount * 2 + i * 2], channelCount * 2);
cblas_scopy(sampleCountExact, &hrtfRight.impulse_response[0], 1, &hrtfData[((hrtfLeft.delay + 2) >> 2) * channelCount * 2 + i * 2 + 1], channelCount * 2);
}
}
cacheObject = [self addImpulse:url sampleCount:sampleCount channelCount:channelCount channelConfig:channelConfig originalSampleRate:sampleRateOfSource targetSampleRate:sampleRateOfSource impulseBuffer:&hrtfData[0]];
impulseData = (const float *)[cacheObject.data bytes];
} catch(std::exception &e) {
ALog(@"Exception caught: %s", e.what());
return nil;
}
}
if(sampleRateOfSource != sampleRate) {
double sampleRatio = sampleRate / sampleRateOfSource;
int resampledCount = (int)ceil((double)sampleCount * sampleRatio);
void *rsstate = rsstate_new(channelCount * 2, sampleRateOfSource, sampleRate);
float *resampledImpulse = (float *)_memalign_malloc(resampledCount * sizeof(float) * channelCount * 2, 16);
if(!resampledImpulse) {
rsstate_delete(rsstate);
return nil;
}
size_t inputDone = 0;
size_t outputDone = 0;
outputDone = rsstate_resample(rsstate, impulseData, sampleCount, &inputDone, resampledImpulse, resampledCount);
while(outputDone < resampledCount) {
outputDone += rsstate_flush(rsstate, resampledImpulse + outputDone * channelCount * 2, resampledCount - outputDone);
}
rsstate_delete(rsstate);
sampleCount = (int)outputDone;
// Normalize resampled impulse by sample ratio
float fSampleRatio = (float)sampleRatio;
vDSP_vsdiv(resampledImpulse, 1, &fSampleRatio, resampledImpulse, 1, sampleCount * channelCount * 2);
cacheObject = [self addImpulse:url sampleCount:sampleCount channelCount:channelCount channelConfig:channelConfig originalSampleRate:sampleRateOfSource targetSampleRate:sampleRate impulseBuffer:resampledImpulse];
free(resampledImpulse);
impulseData = (const float *)[cacheObject.data bytes];
}
*retSampleCount = sampleCount;
return impulseData;
}
@end
@implementation HeadphoneFilter
+ (BOOL)validateImpulseFile:(NSURL *)url {
NSString *filePath = [url path];
try {
std::ifstream file([filePath UTF8String], std::fstream::binary);
if(!file.is_open()) {
throw std::logic_error("Cannot open file.");
}
HrtfData data(file);
file.close();
return YES;
} catch(std::exception &e) {
ALog(@"Exception thrown: %s", e.what());
return NO;
}
}
- (id)initWithImpulseFile:(NSURL *)url forSampleRate:(double)sampleRate withInputChannels:(int)channels withConfig:(uint32_t)config {
self = [super init];
if(self) {
int sampleCount = 0;
const float *impulseBuffer = [[impulseCache sharedController] getImpulse:url sampleCount:&sampleCount channelCount:channels channelConfig:config sampleRate:sampleRate];
if(!impulseBuffer) {
return nil;
}
channelCount = channels;
bufferSize = 512;
fftSize = sampleCount + bufferSize;
int pow = 1;
while(fftSize > 2) {
pow++;
fftSize /= 2;
}
fftSize = 2 << pow;
float *deinterleavedImpulseBuffer = (float *)_memalign_malloc(fftSize * sizeof(float) * channelCount * 2, 16);
if(!deinterleavedImpulseBuffer) {
return nil;
}
for(int i = 0; i < channelCount; ++i) {
cblas_scopy(sampleCount, impulseBuffer + i * 2, (int)channelCount * 2, deinterleavedImpulseBuffer + i * fftSize * 2, 1);
vDSP_vclr(deinterleavedImpulseBuffer + i * fftSize * 2 + sampleCount, 1, fftSize - sampleCount);
cblas_scopy(sampleCount, impulseBuffer + i * 2 + 1, (int)channelCount * 2, deinterleavedImpulseBuffer + i * fftSize * 2 + fftSize, 1);
vDSP_vclr(deinterleavedImpulseBuffer + i * fftSize * 2 + fftSize + sampleCount, 1, fftSize - sampleCount);
}
paddedBufferSize = fftSize;
fftSizeOver2 = (fftSize + 1) / 2;
const int fftSizeOver2Plus1 = fftSizeOver2 + 1; // DFT float overwrites plus one, double doesn't
dftSetupF = vDSP_DFT_zrop_CreateSetup(nil, fftSize, vDSP_DFT_FORWARD);
dftSetupB = vDSP_DFT_zrop_CreateSetup(nil, fftSize, vDSP_DFT_INVERSE);
if(!dftSetupF || !dftSetupB) {
free(deinterleavedImpulseBuffer);
return nil;
}
paddedSignal = (float *)_memalign_malloc(sizeof(float) * paddedBufferSize, 16);
if(!paddedSignal) {
free(deinterleavedImpulseBuffer);
return nil;
}
signal_fft.realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
signal_fft.imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!signal_fft.realp || !signal_fft.imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
input_filtered_signal_per_channel[0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
input_filtered_signal_per_channel[0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!input_filtered_signal_per_channel[0].realp ||
!input_filtered_signal_per_channel[0].imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
input_filtered_signal_per_channel[1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
input_filtered_signal_per_channel[1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!input_filtered_signal_per_channel[1].realp ||
!input_filtered_signal_per_channel[1].imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
input_filtered_signal_totals[0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
input_filtered_signal_totals[0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!input_filtered_signal_totals[0].realp ||
!input_filtered_signal_totals[0].imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
input_filtered_signal_totals[1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
input_filtered_signal_totals[1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!input_filtered_signal_totals[1].realp ||
!input_filtered_signal_totals[1].imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
impulse_responses = (DSPSplitComplex *)calloc(sizeof(DSPSplitComplex), channels * 2);
if(!impulse_responses) {
free(deinterleavedImpulseBuffer);
return nil;
}
for(int i = 0; i < channels; ++i) {
impulse_responses[i * 2 + 0].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
impulse_responses[i * 2 + 0].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
impulse_responses[i * 2 + 1].realp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
impulse_responses[i * 2 + 1].imagp = (float *)_memalign_malloc(sizeof(float) * fftSizeOver2Plus1, 16);
if(!impulse_responses[i * 2 + 0].realp || !impulse_responses[i * 2 + 0].imagp ||
!impulse_responses[i * 2 + 1].realp || !impulse_responses[i * 2 + 1].imagp) {
free(deinterleavedImpulseBuffer);
return nil;
}
vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + i * fftSize * 2), 2, &impulse_responses[i * 2 + 0], 1, fftSizeOver2);
vDSP_ctoz((DSPComplex *)(deinterleavedImpulseBuffer + i * fftSize * 2 + fftSize), 2, &impulse_responses[i * 2 + 1], 1, fftSizeOver2);
vDSP_DFT_Execute(dftSetupF, impulse_responses[i * 2 + 0].realp, impulse_responses[i * 2 + 0].imagp, impulse_responses[i * 2 + 0].realp, impulse_responses[i * 2 + 0].imagp);
vDSP_DFT_Execute(dftSetupF, impulse_responses[i * 2 + 1].realp, impulse_responses[i * 2 + 1].imagp, impulse_responses[i * 2 + 1].realp, impulse_responses[i * 2 + 1].imagp);
}
free(deinterleavedImpulseBuffer);
left_result = (float *)_memalign_malloc(sizeof(float) * fftSize, 16);
right_result = (float *)_memalign_malloc(sizeof(float) * fftSize, 16);
if(!left_result || !right_result)
return nil;
prevInputs = (float **)calloc(channels, sizeof(float *));
if(!prevInputs)
return nil;
for(int i = 0; i < channels; ++i) {
prevInputs[i] = (float *)_memalign_malloc(sizeof(float) * fftSize, 16);
if(!prevInputs[i])
return nil;
vDSP_vclr(prevInputs[i], 1, fftSize);
}
}
return self;
}
- (void)dealloc {
if(dftSetupF) vDSP_DFT_DestroySetup(dftSetupF);
if(dftSetupB) vDSP_DFT_DestroySetup(dftSetupB);
free(paddedSignal);
free(signal_fft.realp);
free(signal_fft.imagp);
free(input_filtered_signal_per_channel[0].realp);
free(input_filtered_signal_per_channel[0].imagp);
free(input_filtered_signal_per_channel[1].realp);
free(input_filtered_signal_per_channel[1].imagp);
free(input_filtered_signal_totals[0].realp);
free(input_filtered_signal_totals[0].imagp);
free(input_filtered_signal_totals[1].realp);
free(input_filtered_signal_totals[1].imagp);
if(impulse_responses) {
for(int i = 0; i < channelCount * 2; ++i) {
free(impulse_responses[i].realp);
free(impulse_responses[i].imagp);
}
free(impulse_responses);
}
free(left_result);
free(right_result);
if(prevInputs) {
for(int i = 0; i < channelCount; ++i) {
free(prevInputs[i]);
}
free(prevInputs);
}
}
- (void)process:(const float *)inBuffer sampleCount:(int)count toBuffer:(float *)outBuffer {
const float scale = 1.0 / (4.0 * (float)fftSize);
while(count > 0) {
const int countToDo = (count > bufferSize) ? bufferSize : count;
const int prevToDo = fftSize - countToDo;
vDSP_vclr(input_filtered_signal_totals[0].realp, 1, fftSizeOver2);
vDSP_vclr(input_filtered_signal_totals[0].imagp, 1, fftSizeOver2);
vDSP_vclr(input_filtered_signal_totals[1].realp, 1, fftSizeOver2);
vDSP_vclr(input_filtered_signal_totals[1].imagp, 1, fftSizeOver2);
for(int i = 0; i < channelCount; ++i) {
cblas_scopy((int)prevToDo, prevInputs[i] + countToDo, 1, paddedSignal, 1);
cblas_scopy((int)countToDo, inBuffer + i, (int)channelCount, paddedSignal + prevToDo, 1);
cblas_scopy((int)fftSize, paddedSignal, 1, prevInputs[i], 1);
vDSP_ctoz((DSPComplex *)paddedSignal, 2, &signal_fft, 1, fftSizeOver2);
vDSP_DFT_Execute(dftSetupF, signal_fft.realp, signal_fft.imagp, signal_fft.realp, signal_fft.imagp);
// One channel forward, then multiply and back twice
float preserveIRNyq = impulse_responses[i * 2 + 0].imagp[0];
float preserveSigNyq = signal_fft.imagp[0];
impulse_responses[i * 2 + 0].imagp[0] = 0;
signal_fft.imagp[0] = 0;
vDSP_zvmul(&signal_fft, 1, &impulse_responses[i * 2 + 0], 1, &input_filtered_signal_per_channel[0], 1, fftSizeOver2, 1);
input_filtered_signal_per_channel[0].imagp[0] = preserveIRNyq * preserveSigNyq;
impulse_responses[i * 2 + 0].imagp[0] = preserveIRNyq;
preserveIRNyq = impulse_responses[i * 2 + 1].imagp[0];
impulse_responses[i * 2 + 1].imagp[0] = 0;
vDSP_zvmul(&signal_fft, 1, &impulse_responses[i * 2 + 1], 1, &input_filtered_signal_per_channel[1], 1, fftSizeOver2, 1);
input_filtered_signal_per_channel[1].imagp[0] = preserveIRNyq * preserveSigNyq;
impulse_responses[i * 2 + 1].imagp[0] = preserveIRNyq;
vDSP_zvadd(&input_filtered_signal_totals[0], 1, &input_filtered_signal_per_channel[0], 1, &input_filtered_signal_totals[0], 1, fftSizeOver2);
vDSP_zvadd(&input_filtered_signal_totals[1], 1, &input_filtered_signal_per_channel[1], 1, &input_filtered_signal_totals[1], 1, fftSizeOver2);
}
vDSP_DFT_Execute(dftSetupB, input_filtered_signal_totals[0].realp, input_filtered_signal_totals[0].imagp, input_filtered_signal_totals[0].realp, input_filtered_signal_totals[0].imagp);
vDSP_DFT_Execute(dftSetupB, input_filtered_signal_totals[1].realp, input_filtered_signal_totals[1].imagp, input_filtered_signal_totals[1].realp, input_filtered_signal_totals[1].imagp);
vDSP_ztoc(&input_filtered_signal_totals[0], 1, (DSPComplex *)left_result, 2, fftSizeOver2);
vDSP_ztoc(&input_filtered_signal_totals[1], 1, (DSPComplex *)right_result, 2, fftSizeOver2);
float *left_ptr = left_result + prevToDo;
float *right_ptr = right_result + prevToDo;
vDSP_vsmul(left_ptr, 1, &scale, left_ptr, 1, countToDo);
vDSP_vsmul(right_ptr, 1, &scale, right_ptr, 1, countToDo);
cblas_scopy((int)countToDo, left_ptr, 1, outBuffer + 0, 2);
cblas_scopy((int)countToDo, right_ptr, 1, outBuffer + 1, 2);
inBuffer += countToDo * channelCount;
outBuffer += countToDo * 2;
count -= countToDo;
}
}
- (void)reset {
for(int i = 0; i < channelCount; ++i) {
vDSP_vclr(prevInputs[i], 1, fftSize);
}
}
@end