Significantly reduce stack memory usage

Oops, there were a lot of large local buffers in use here.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
xcode15
Christopher Snowhill 2022-07-14 17:28:24 -07:00
parent 193af27e7e
commit 838c0d08e8
5 changed files with 26 additions and 20 deletions

View File

@ -45,6 +45,9 @@ NS_ASSUME_NONNULL_BEGIN
uint32_t inputChannelConfig;
BOOL inputLossless;
uint8_t *tempData;
size_t tempDataSize;
}
@property(readonly) double listDuration;

View File

@ -408,6 +408,9 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes
dsd2pcm = NULL;
}
#endif
if(tempData) {
free(tempData);
}
}
- (void)reset {
@ -572,22 +575,24 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes
NSData *inputData = [inChunk removeSamples:samplesRead];
#if DSD_DECIMATE
const size_t sizeFactor = 2;
const size_t sizeFactor = 3;
#else
const size_t sizeFactor = (bitsPerSample == 1) ? 9 : 2;
const size_t sizeFactor = (bitsPerSample == 1) ? 9 : 3;
#endif
uint8_t tempData[samplesRead * floatFormat.mBytesPerPacket * sizeFactor + 32]; // Either two buffers plus padding, and/or double precision in case of endian flip
size_t newSize = samplesRead * floatFormat.mBytesPerPacket * sizeFactor + 64;
if(!tempData || tempDataSize < newSize)
tempData = realloc(tempData, tempDataSize = newSize); // Either two buffers plus padding, and/or double precision in case of endian flip
// double buffer system, with alignment
const size_t buffer_adder_base = (samplesRead * floatFormat.mBytesPerPacket + 31) & ~31;
NSUInteger bytesReadFromInput = samplesRead * inputFormat.mBytesPerPacket;
uint8_t *inputBuffer = (uint8_t *)[inputData bytes];
BOOL inputChanged = NO;
BOOL hdcdSustained = NO;
if(bytesReadFromInput && isBigEndian) {
// Time for endian swap!
memcpy(&tempData[0], [inputData bytes], bytesReadFromInput);
@ -598,10 +603,11 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes
if(bytesReadFromInput && isFloat && bitsPerSample == 64) {
// Time for precision loss from weird inputs
const size_t buffer_adder = (inputBuffer == &tempData[0]) ? buffer_adder_base * 2 : 0;
samplesRead = bytesReadFromInput / sizeof(double);
convert_f64_to_f32((float *)(&tempData[0]), (const double *)inputBuffer, samplesRead);
convert_f64_to_f32((float *)(&tempData[buffer_adder]), (const double *)inputBuffer, samplesRead);
bytesReadFromInput = samplesRead * sizeof(float);
inputBuffer = (uint8_t *)(&tempData[0]);
inputBuffer = &tempData[buffer_adder];
inputChanged = YES;
bitsPerSample = 32;
}

View File

@ -211,9 +211,7 @@
}
- (void)launchThread {
NSThread *thread = [[NSThread alloc] initWithTarget:self selector:@selector(threadEntry:) object:nil];
[thread setStackSize:1024 * 1024]; // Dammit, this new code makes the nodes overflow the stack size, so let's double the stack
[thread start];
[NSThread detachNewThreadSelector:@selector(threadEntry:) toTarget:self withObject:nil];
}
- (void)setPreviousNode:(id)p {

View File

@ -117,11 +117,15 @@ using std::atomic_long;
float *samplePtr;
float tempBuffer[512 * 32];
float r8bTempBuffer[4096 * 32];
float inputBuffer[4096 * 32]; // 4096 samples times maximum supported channel count
float fsurroundBuffer[4096 * 6];
float hrtfBuffer[4096 * 2];
float eqBuffer[4096 * 32];
float visAudio[4096];
float visTemp[8192];
#ifdef OUTPUT_LOG
FILE *_logFile;
#endif

View File

@ -63,8 +63,6 @@ static OSStatus eqRenderCallback(void *inRefCon, AudioUnitRenderActionFlags *ioA
- (int)renderInput:(int)amountToRead toBuffer:(float *)buffer {
int amountRead = 0;
float visAudio[amountToRead]; // Chunk size
if(stopping == YES || [outputController shouldContinue] == NO) {
// Chain is dead, fill out the serial number pointer forever with silence
stopping = YES;
@ -151,7 +149,6 @@ static OSStatus eqRenderCallback(void *inRefCon, AudioUnitRenderActionFlags *ioA
[visController postSampleRate:44100.0];
float visTemp[8192];
if(newFormat.mSampleRate != 44100.0) {
if(newFormat.mSampleRate != lastVisRate) {
if(r8bvis) {
@ -768,8 +765,6 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
if([self processEndOfStream]) break;
} while(inputRendered < 4096);
float tempBuffer[4096 * 32];
int samplesRenderedTotal = 0;
for(size_t i = 0; i < 2;) {
@ -781,7 +776,7 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
continue;
}
[currentPtsLock lock];
samplesRendered = r8bstate_flush(r8bold, &tempBuffer[0], 4096);
samplesRendered = r8bstate_flush(r8bold, &r8bTempBuffer[0], 4096);
[currentPtsLock unlock];
if(!samplesRendered) {
r8bstate_delete(r8bold);
@ -790,7 +785,7 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
++i;
continue;
}
samplePtr = &tempBuffer[0];
samplePtr = &r8bTempBuffer[0];
} else {
samplesRendered = inputRendered;
samplePtr = &inputBuffer[0];