Significantly reduce stack memory usage

Oops, there were a lot of large local buffers in use here. Signed-off-by: Christopher Snowhill <kode54@gmail.com>
2022-07-14 17:28:24 -07:00 · 2022-07-14 17:28:24 -07:00 · 838c0d08e8
parent 193af27e7e
commit 838c0d08e8
5 changed files with 26 additions and 20 deletions
--- a/Audio/Chain/ChunkList.h
+++ b/Audio/Chain/ChunkList.h
@ -45,6 +45,9 @@ NS_ASSUME_NONNULL_BEGIN

 	uint32_t inputChannelConfig;
 	BOOL inputLossless;
+
+	uint8_t *tempData;
+	size_t tempDataSize;
 }

@property(readonly) double listDuration;
--- a/Audio/Chain/ChunkList.m
+++ b/Audio/Chain/ChunkList.m
@ -408,6 +408,9 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes
 		dsd2pcm = NULL;
 	}
 #endif
+	if(tempData) {
+		free(tempData);
+	}
 }

 - (void)reset {
@ -572,11 +575,13 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes
 	NSData *inputData = [inChunk removeSamples:samplesRead];

 #if DSD_DECIMATE
-	const size_t sizeFactor = 2;
+	const size_t sizeFactor = 3;
 #else
-	const size_t sizeFactor = (bitsPerSample == 1) ? 9 : 2;
+	const size_t sizeFactor = (bitsPerSample == 1) ? 9 : 3;
 #endif
-	uint8_t tempData[samplesRead * floatFormat.mBytesPerPacket * sizeFactor + 32]; // Either two buffers plus padding, and/or double precision in case of endian flip
+	size_t newSize = samplesRead * floatFormat.mBytesPerPacket * sizeFactor + 64;
+	if(!tempData || tempDataSize < newSize)
+		tempData = realloc(tempData, tempDataSize = newSize); // Either two buffers plus padding, and/or double precision in case of endian flip

 	// double buffer system, with alignment
 	const size_t buffer_adder_base = (samplesRead * floatFormat.mBytesPerPacket + 31) & ~31;
@ -598,10 +603,11 @@ static void convert_be_to_le(uint8_t *buffer, size_t bitsPerSample, size_t bytes

 	if(bytesReadFromInput && isFloat && bitsPerSample == 64) {
 		// Time for precision loss from weird inputs
+		const size_t buffer_adder = (inputBuffer == &tempData[0]) ? buffer_adder_base * 2 : 0;
 		samplesRead = bytesReadFromInput / sizeof(double);
-		convert_f64_to_f32((float *)(&tempData[0]), (const double *)inputBuffer, samplesRead);
+		convert_f64_to_f32((float *)(&tempData[buffer_adder]), (const double *)inputBuffer, samplesRead);
 		bytesReadFromInput = samplesRead * sizeof(float);
-		inputBuffer = (uint8_t *)(&tempData[0]);
+		inputBuffer = &tempData[buffer_adder];
 		inputChanged = YES;
 		bitsPerSample = 32;
 	}
--- a/Audio/Chain/Node.m
+++ b/Audio/Chain/Node.m
@ -211,9 +211,7 @@
 }

 - (void)launchThread {
-	NSThread *thread = [[NSThread alloc] initWithTarget:self selector:@selector(threadEntry:) object:nil];
-	[thread setStackSize:1024 * 1024]; // Dammit, this new code makes the nodes overflow the stack size, so let's double the stack
-	[thread start];
+	[NSThread detachNewThreadSelector:@selector(threadEntry:) toTarget:self withObject:nil];
 }

 - (void)setPreviousNode:(id)p {
--- a/Audio/Output/OutputAVFoundation.h
+++ b/Audio/Output/OutputAVFoundation.h
@ -117,11 +117,15 @@ using std::atomic_long;

 	float *samplePtr;
 	float tempBuffer[512 * 32];
+	float r8bTempBuffer[4096 * 32];
 	float inputBuffer[4096 * 32]; // 4096 samples times maximum supported channel count
 	float fsurroundBuffer[4096 * 6];
 	float hrtfBuffer[4096 * 2];
 	float eqBuffer[4096 * 32];

+	float visAudio[4096];
+	float visTemp[8192];
+
 #ifdef OUTPUT_LOG
 	FILE *_logFile;
 #endif
--- a/Audio/Output/OutputAVFoundation.m
+++ b/Audio/Output/OutputAVFoundation.m
@ -63,8 +63,6 @@ static OSStatus eqRenderCallback(void *inRefCon, AudioUnitRenderActionFlags *ioA
 - (int)renderInput:(int)amountToRead toBuffer:(float *)buffer {
 	int amountRead = 0;

-	float visAudio[amountToRead]; // Chunk size
-
 	if(stopping == YES || [outputController shouldContinue] == NO) {
 		// Chain is dead, fill out the serial number pointer forever with silence
 		stopping = YES;
@ -151,7 +149,6 @@ static OSStatus eqRenderCallback(void *inRefCon, AudioUnitRenderActionFlags *ioA

 		[visController postSampleRate:44100.0];

-		float visTemp[8192];
 		if(newFormat.mSampleRate != 44100.0) {
 			if(newFormat.mSampleRate != lastVisRate) {
 				if(r8bvis) {
@ -768,8 +765,6 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
 		if([self processEndOfStream]) break;
 	} while(inputRendered < 4096);

-	float tempBuffer[4096 * 32];
-
 	int samplesRenderedTotal = 0;

 	for(size_t i = 0; i < 2;) {
@ -781,7 +776,7 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
 				continue;
 			}
 			[currentPtsLock lock];
-			samplesRendered = r8bstate_flush(r8bold, &tempBuffer[0], 4096);
+			samplesRendered = r8bstate_flush(r8bold, &r8bTempBuffer[0], 4096);
 			[currentPtsLock unlock];
 			if(!samplesRendered) {
 				r8bstate_delete(r8bold);
@ -790,7 +785,7 @@ current_device_listener(AudioObjectID inObjectID, UInt32 inNumberAddresses, cons
 				++i;
 				continue;
 			}
-			samplePtr = &tempBuffer[0];
+			samplePtr = &r8bTempBuffer[0];
 		} else {
 			samplesRendered = inputRendered;
 			samplePtr = &inputBuffer[0];