Ring Buffer: Replace virtual buffers

Replace individual virtual buffers with large _mm_malloc blocks at a
time, then dole out chunks of those buffers as the nodes need them.
Should reduce memory contention a little bit.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
CQTexperiment
Christopher Snowhill 2022-01-29 21:32:59 -08:00
parent 9e5a70c9ae
commit 476c88973b
1 changed files with 127 additions and 99 deletions

View File

@ -17,15 +17,43 @@
#import "VirtualRingBuffer.h" #import "VirtualRingBuffer.h"
#include <mach/mach.h> #import <Foundation/Foundation.h>
#include <mach/mach_error.h> #import <stdlib.h>
#import <mm_malloc.h>
#import "Logging.h" #import "Logging.h"
@implementation VirtualRingBuffer @interface block_chunk : NSObject {
void * blockPointer;
void * theBlock;
size_t blockSize;
}
static void *allocateVirtualBuffer(UInt32 bufferLength); @property void * blockPointer;
static void deallocateVirtualBuffer(void *buffer, UInt32 bufferLength); @property void * theBlock;
@property size_t blockSize;
@end
@implementation block_chunk
@synthesize blockPointer;
@synthesize theBlock;
@synthesize blockSize;
@end
@interface VirtualBufferHolder : NSObject {
NSMutableArray * blocks;
NSMutableArray * blocksUsed;
NSMutableDictionary * blockRefCounts;
}
+ (VirtualBufferHolder *) sharedInstance;
- (void *) allocateBlock:(size_t)size;
- (void) freeBlock:(void *)block;
@end
@implementation VirtualRingBuffer
- (id)initWithLength:(UInt32)length - (id)initWithLength:(UInt32)length
@ -36,7 +64,7 @@ static void deallocateVirtualBuffer(void *buffer, UInt32 bufferLength);
// We need to allocate entire VM pages, so round the specified length up to the next page if necessary. // We need to allocate entire VM pages, so round the specified length up to the next page if necessary.
bufferLength = (UInt32) round_page(length); bufferLength = (UInt32) round_page(length);
buffer = allocateVirtualBuffer(bufferLength); buffer = [[VirtualBufferHolder sharedInstance] allocateBlock:bufferLength];
if (!buffer) if (!buffer)
{ {
self = nil; self = nil;
@ -55,7 +83,7 @@ static void deallocateVirtualBuffer(void *buffer, UInt32 bufferLength);
- (void)dealloc - (void)dealloc
{ {
if (buffer) if (buffer)
deallocateVirtualBuffer(buffer, bufferLength); [[VirtualBufferHolder sharedInstance] freeBlock:buffer];
} }
- (void)empty - (void)empty
@ -179,106 +207,106 @@ static void deallocateVirtualBuffer(void *buffer, UInt32 bufferLength);
@end @end
@implementation VirtualBufferHolder
void *allocateVirtualBuffer(UInt32 bufferLength) static VirtualBufferHolder * g_instance = nil;
+ (VirtualBufferHolder *) sharedInstance
{ {
kern_return_t error; @synchronized (g_instance) {
vm_address_t originalAddress = (vm_address_t)NULL; if (!g_instance) {
vm_address_t realAddress = (vm_address_t)NULL; g_instance = [[VirtualBufferHolder alloc] init];
mach_port_t memoryEntry; }
vm_size_t memoryEntryLength; return g_instance;
vm_address_t virtualAddress = (vm_address_t)NULL; }
}
// We want to find where we can get 2 * bufferLength bytes of contiguous address space. - (id) init {
// So let's just allocate that space, remember its address, and deallocate it. self = [super init];
// (This doesn't actually have to touch all of that memory so it's not terribly expensive.)
error = vm_allocate(mach_task_self(), &originalAddress, 2 * bufferLength, TRUE); if (self) {
if (error) { blocks = [[NSMutableArray alloc] init];
#if DEBUG blocksUsed = [[NSMutableArray alloc] init];
mach_error("vm_allocate initial chunk", error); blockRefCounts = [[NSMutableDictionary alloc] init];
#endif
return NULL;
} }
error = vm_deallocate(mach_task_self(), originalAddress, 2 * bufferLength); return self;
if (error) { }
#if DEBUG
mach_error("vm_deallocate initial chunk", error);
#endif
return NULL;
}
// Then allocate a "real" block of memory at the same address, but with the normal bufferLength. - (void *)allocateBlock:(size_t)size {
realAddress = originalAddress; @synchronized(blocks) {
error = vm_allocate(mach_task_self(), &realAddress, bufferLength, FALSE); tryagain:
if (error) { for (block_chunk * chunk in blocks) {
#if DEBUG if (chunk.blockSize == size) {
mach_error("vm_allocate real chunk", error); [blocksUsed addObject:chunk];
#endif [blocks removeObject:chunk];
return NULL; NSInteger refCount = [[blockRefCounts objectForKey:[NSNumber numberWithLongLong:(uintptr_t)chunk.theBlock]] integerValue];
} [blockRefCounts setObject:[NSNumber numberWithInteger:refCount + 1] forKey:[NSNumber numberWithLongLong:(uintptr_t)chunk.theBlock]];
if (realAddress != originalAddress) { return chunk.blockPointer;
DLog(@"allocateVirtualBuffer: vm_allocate 2nd time didn't return same address (%p vs %p)", (void *) originalAddress, (void *) realAddress); }
goto errorReturn; }
} if (![blocks count]) {
void * theBlock = _mm_malloc(32 * 1024 * 1024, 1024);
if (!theBlock) return NULL;
// Then make a memory entry for the area we just allocated. @synchronized (blocks) {
memoryEntryLength = bufferLength; block_chunk * chunk = [[block_chunk alloc] init];
error = mach_make_memory_entry(mach_task_self(), &memoryEntryLength, realAddress, VM_PROT_READ | VM_PROT_WRITE, &memoryEntry, (vm_address_t)NULL);
if (error) {
#if DEBUG
mach_error("mach_make_memory_entry", error);
#endif
goto errorReturn;
}
if (!memoryEntry) {
DLog(@"mach_make_memory_entry: returned memoryEntry of NULL");
goto errorReturn;
}
if (memoryEntryLength != bufferLength) {
DLog(@"mach_make_memory_entry: size changed (from %0x to %0lx)", bufferLength, memoryEntryLength);
goto errorReturn;
}
// And map the area immediately after the first block, with length bufferLength, to that memory entry. chunk.theBlock = theBlock;
virtualAddress = realAddress + bufferLength; chunk.blockPointer = theBlock;
error = vm_map(mach_task_self(), &virtualAddress, bufferLength, 0, FALSE, memoryEntry, 0, FALSE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_DEFAULT); chunk.blockSize = 4 * 1024 * 1024;
if (error) {
#if DEBUG
mach_error("vm_map", error);
#endif
// TODO Retry from the beginning, instead of failing completely. There is a tiny (but > 0) probability that someone
// will allocate this space out from under us.
virtualAddress = (vm_address_t)NULL;
goto errorReturn;
}
if (virtualAddress != realAddress + bufferLength) {
DLog(@"vm_map: didn't return correct address (%p vs %p)", (void *) realAddress + bufferLength, (void *) virtualAddress);
goto errorReturn;
}
// Success! [blocks addObject:chunk];
return (void *)realAddress;
errorReturn: chunk = [[block_chunk alloc] init];
if (realAddress)
vm_deallocate(mach_task_self(), realAddress, bufferLength); chunk.theBlock = theBlock;
if (virtualAddress) chunk.blockPointer = theBlock + 4 * 1024 * 1024;
vm_deallocate(mach_task_self(), virtualAddress, bufferLength); chunk.blockSize = 4 * 1024 * 1024;
[blocks addObject:chunk];
for (size_t i = 8 * 1024 * 1024; i < 32 * 1024 * 1024; i += 1024 * 1024) {
chunk = [[block_chunk alloc] init];
chunk.theBlock = theBlock;
chunk.blockPointer = theBlock + i;
chunk.blockSize = 1024 * 1024;
[blocks addObject:chunk];
}
}
goto tryagain;
}
}
return NULL; return NULL;
} }
void deallocateVirtualBuffer(void *buffer, UInt32 bufferLength) - (void) freeBlock:(void *)block {
{ @synchronized(blocks) {
kern_return_t error; for (block_chunk * chunk in blocksUsed) {
if (chunk.blockPointer == block) {
// We can conveniently deallocate both the vm_allocated memory and [blocks addObject:chunk];
// the vm_mapped region at the same time. [blocksUsed removeObject:chunk];
error = vm_deallocate(mach_task_self(), (vm_address_t)buffer, bufferLength * 2); NSInteger refCount = [[blockRefCounts objectForKey:[NSNumber numberWithLongLong:(uintptr_t)chunk.theBlock]] integerValue];
if (error) { if (refCount <= 1) {
#if DEBUG [blockRefCounts removeObjectForKey:[NSNumber numberWithLongLong:(uintptr_t)chunk.theBlock]];
mach_error("vm_deallocate in dealloc", error); NSArray * blocksCopy = [blocks copy];
#endif for (block_chunk * removeChunk in blocksCopy) {
if (removeChunk.theBlock == chunk.theBlock) {
[blocks removeObject:removeChunk];
}
}
_mm_free(chunk.theBlock);
}
else {
[blockRefCounts setObject:[NSNumber numberWithInteger:refCount - 1] forKey:[NSNumber numberWithLongLong:(uintptr_t)chunk.theBlock]];
}
return;
}
}
} }
} }
@end