Cog Audio / WavPack input: Add DSD decimation to converter, and change WavPack input to emit only raw DSD

2022-01-14 06:12:14 -08:00 · 2022-01-14 06:12:14 -08:00 · 748891f285
parent 996bdec4be
commit 748891f285
3 changed files with 336 additions and 90 deletions
--- a/Audio/Chain/ConverterNode.h
+++ b/Audio/Chain/ConverterNode.h
@ -51,6 +51,10 @@
    
    void *extrapolateBuffer;
    size_t extrapolateBufferSize;
+    
+    void **dsd2pcm;
+    size_t dsd2pcmCount;
+    int dsd2pcmLatency;
 	
 	AudioStreamBasicDescription inputFormat;
    AudioStreamBasicDescription floatFormat;
--- a/Audio/Chain/ConverterNode.m
+++ b/Audio/Chain/ConverterNode.m
@ -65,6 +65,9 @@ void PrintStreamDesc (AudioStreamBasicDescription *inDesc)
        
        extrapolateBuffer = NULL;
        extrapolateBufferSize = 0;
+        
+        dsd2pcm = NULL;
+        dsd2pcmCount = 0;

        [[NSUserDefaultsController sharedUserDefaultsController] addObserver:self forKeyPath:@"values.volumeScaling"		options:0 context:nil];
        [[NSUserDefaultsController sharedUserDefaultsController] addObserver:self forKeyPath:@"values.outputResampling" options:0 context:nil];
@ -256,6 +259,248 @@ void scale_by_volume(float * buffer, size_t count, float volume)
            buffer[i] *= volume;
 }

+/**
+ * DSD 2 PCM: Stage 1:
+ * Decimate by factor 8
+ * (one byte (8 samples) -> one float sample)
+ * The bits are processed from least signicifant to most signicicant.
+ * @author Sebastian Gesemann
+ */
+
+#define dsd2pcm_FILTER_COEFFS_COUNT 64
+static const float dsd2pcm_FILTER_COEFFS[64] =
+{
+    0.09712411121659f, 0.09613438994044f, 0.09417884216316f, 0.09130441727307f,
+    0.08757947648990f, 0.08309142055179f, 0.07794369263673f, 0.07225228745463f,
+    0.06614191680338f, 0.05974199351302f, 0.05318259916599f, 0.04659059631228f,
+    0.04008603356890f, 0.03377897290478f, 0.02776684382775f, 0.02213240062966f,
+    0.01694232798846f, 0.01224650881275f, 0.00807793792573f, 0.00445323755944f,
+    0.00137370697215f,-0.00117318019994f,-0.00321193033831f,-0.00477694265140f,
+    -0.00591028841335f,-0.00665946056286f,-0.00707518873201f,-0.00720940203988f,
+    -0.00711340642819f,-0.00683632603227f,-0.00642384017266f,-0.00591723006715f,
+    -0.00535273320457f,-0.00476118922548f,-0.00416794965654f,-0.00359301524813f,
+    -0.00305135909510f,-0.00255339111833f,-0.00210551956895f,-0.00171076760278f,
+    -0.00136940723130f,-0.00107957856005f,-0.00083786862365f,-0.00063983084245f,
+    -0.00048043272086f,-0.00035442550015f,-0.00025663481039f,-0.00018217573430f,
+    -0.00012659899635f,-0.00008597726991f,-0.00005694188820f,-0.00003668060332f,
+    -0.00002290670286f,-0.00001380895679f,-0.00000799057558f,-0.00000440385083f,
+    -0.00000228567089f,-0.00000109760778f,-0.00000047286430f,-0.00000017129652f,
+    -0.00000004282776f, 0.00000000119422f, 0.00000000949179f, 0.00000000747450f
+};
+
+struct dsd2pcm_state {
+    /*
+    * This is the 2nd half of an even order symmetric FIR
+    * lowpass filter (to be used on a signal sampled at 44100*64 Hz)
+    * Passband is 0-24 kHz (ripples +/- 0.025 dB)
+    * Stopband starts at 176.4 kHz (rejection: 170 dB)
+    * The overall gain is 2.0
+    */
+
+    /* These remain constant for the duration */
+    int FILT_LOOKUP_PARTS;
+    float * FILT_LOOKUP_TABLE;
+    uint8_t * REVERSE_BITS;
+    int FIFO_LENGTH;
+    int FIFO_OFS_MASK;
+
+    /* These are altered */
+    int * fifo;
+    int fpos;
+};
+
+static void dsd2pcm_free(void *);
+static void dsd2pcm_reset(void *);
+
+static void * dsd2pcm_alloc()
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) calloc(1, sizeof(struct dsd2pcm_state));
+    
+    if (!state)
+        return NULL;
+    
+    state->FILT_LOOKUP_PARTS = ( dsd2pcm_FILTER_COEFFS_COUNT + 7 ) / 8;
+    const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
+    // The current 128 tap FIR leads to an 8 KB lookup table
+    state->FILT_LOOKUP_TABLE = (float*) calloc(sizeof(float), FILT_LOOKUP_PARTS << 8);
+    if (!state->FILT_LOOKUP_TABLE)
+        goto fail;
+    float* FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE;
+    double * temp = (double*) calloc(sizeof(double), 0x100);
+    if (!temp)
+        goto fail;
+    for ( int part=0, sofs=0, dofs=0; part < FILT_LOOKUP_PARTS; )
+    {
+        memset( temp, 0, 0x100 * sizeof( double ) );
+        for ( int bit=0, bitmask=0x80; bit<8 && sofs+bit < dsd2pcm_FILTER_COEFFS_COUNT; )
+        {
+            double coeff = dsd2pcm_FILTER_COEFFS[ sofs + bit ];
+            for ( int bite=0; bite < 0x100; bite++ )
+            {
+                if ( ( bite & bitmask ) == 0 )
+                {
+                    temp[ bite ] -= coeff;
+                } else {
+                    temp[ bite ] += coeff;
+                }
+            }
+            bit++;
+            bitmask >>= 1;
+        }
+        for ( int s = 0; s < 0x100; ) {
+            FILT_LOOKUP_TABLE[dofs++] = (float) temp[s++];
+        }
+        part++;
+        sofs += 8;
+    }
+    free(temp);
+    { // calculate FIFO stuff
+        int k = 1;
+        while (k<FILT_LOOKUP_PARTS*2) k<<=1;
+        state->FIFO_LENGTH = k;
+        state->FIFO_OFS_MASK = k-1;
+    }
+    state->REVERSE_BITS = (uint8_t*) calloc(1, 0x100);
+    if (!state->REVERSE_BITS)
+        goto fail;
+    uint8_t* REVERSE_BITS = state->REVERSE_BITS;
+    for (int i=0, j=0; i<0x100; i++) {
+        REVERSE_BITS[i] = ( uint8_t ) j;
+        // "reverse-increment" of j
+        for (int bitmask=0x80;;) {
+            if (((j^=bitmask) & bitmask)!=0) break;
+            if (bitmask==1) break;
+            bitmask >>= 1;
+        }
+    }
+
+    state->fifo = (int*) calloc(sizeof(int), state->FIFO_LENGTH);
+    if (!state->fifo)
+        goto fail;
+
+    dsd2pcm_reset(state);
+    
+    return (void*) state;
+    
+fail:
+    dsd2pcm_free(state);
+    return NULL;
+}
+
+static void * dsd2pcm_dup(void * _state)
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) _state;
+    if (state)
+    {
+        struct dsd2pcm_state * newstate = (struct dsd2pcm_state *) calloc(1, sizeof(struct dsd2pcm_state));
+        if (newstate) {
+            newstate->FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
+            newstate->FIFO_LENGTH = state->FIFO_LENGTH;
+            newstate->FIFO_OFS_MASK = state->FIFO_OFS_MASK;
+            newstate->fpos = state->fpos;
+            
+            newstate->FILT_LOOKUP_TABLE = (float*) calloc(sizeof(float), state->FILT_LOOKUP_PARTS << 8);
+            if (!newstate->FILT_LOOKUP_TABLE)
+                goto fail;
+            
+            memcpy(newstate->FILT_LOOKUP_TABLE, state->FILT_LOOKUP_TABLE, sizeof(float) * (state->FILT_LOOKUP_PARTS << 8));
+            
+            newstate->REVERSE_BITS = (uint8_t*) calloc(1, 0x100);
+            if (!newstate->REVERSE_BITS)
+                goto fail;
+            
+            memcpy(newstate->REVERSE_BITS, state->REVERSE_BITS, 0x100);
+            
+            newstate->fifo = (int*) calloc(sizeof(int), state->FIFO_LENGTH);
+            if (!newstate->fifo)
+                goto fail;
+            
+            memcpy(newstate->fifo, state->fifo, sizeof(int) * state->FIFO_LENGTH);
+
+            return (void*) newstate;
+        }
+        
+    fail:
+        dsd2pcm_free(newstate);
+        return NULL;
+    }
+    
+    return NULL;
+}
+
+static void dsd2pcm_free(void * _state)
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) _state;
+    if (state)
+    {
+        free(state->fifo);
+        free(state->REVERSE_BITS);
+        free(state->FILT_LOOKUP_TABLE);
+        free(state);
+    }
+}
+
+static void dsd2pcm_reset(void * _state)
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) _state;
+    const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
+    int* fifo = state->fifo;
+    for (int i=0; i<FILT_LOOKUP_PARTS; i++) {
+        fifo[i] = 0x55;
+        fifo[i+FILT_LOOKUP_PARTS] = 0xAA;
+    }
+    state->fpos = FILT_LOOKUP_PARTS;
+}
+
+static void dsd2pcm_process(void * _state, uint8_t * src, size_t sofs, size_t sinc, float * dest, size_t dofs, size_t dinc, size_t len)
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) _state;
+    int bite1, bite2, temp;
+    float sample;
+    int* fifo = state->fifo;
+    const uint8_t* REVERSE_BITS = state->REVERSE_BITS;
+    const float* FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE;
+    const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
+    const int FIFO_OFS_MASK = state->FIFO_OFS_MASK;
+    int fpos = state->fpos;
+    while ( len > 0 )
+    {
+        fifo[ fpos ] = REVERSE_BITS[ fifo[ fpos ] ] & 0xFF;
+        fifo[ ( fpos + FILT_LOOKUP_PARTS ) & FIFO_OFS_MASK ] = src[ sofs ] & 0xFF;
+        sofs += sinc;
+        temp = ( fpos + 1 ) & FIFO_OFS_MASK;
+        sample = 0;
+        for ( int k=0, lofs=0; k < FILT_LOOKUP_PARTS; )
+        {
+            bite1 = fifo[ ( fpos - k ) & FIFO_OFS_MASK ];
+            bite2 = fifo[ ( temp + k ) & FIFO_OFS_MASK ];
+            sample += FILT_LOOKUP_TABLE[ lofs + bite1 ] + FILT_LOOKUP_TABLE[ lofs + bite2 ];
+            k++;
+            lofs += 0x100;
+        }
+        fpos = temp;
+        dest[ dofs ] = sample;
+        dofs += dinc;
+        len--;
+    }
+    state->fpos = fpos;
+}
+
+static int dsd2pcm_latency(void * _state)
+{
+    struct dsd2pcm_state * state = (struct dsd2pcm_state *) _state;
+    if (state) return state->FIFO_LENGTH;
+    else return 0;
+}
+
+static void convert_dsd_to_f32(float *output, uint8_t *input, size_t count, size_t channels, void ** dsd2pcm)
+{
+    for (size_t channel = 0; channel < channels; ++channel)
+    {
+        dsd2pcm_process(dsd2pcm[channel], input, channel, channels, output, channel, channels, count);
+    }
+}
+
 static void convert_u8_to_s16(int16_t *output, uint8_t *input, size_t count)
 {
    for (size_t i = 0; i < count; ++i)
@ -457,6 +702,7 @@ static void extrapolate(float *buffer, size_t channels, size_t frameSize, size_t
    int amountRead = 0;
    int extrapolateStart = 0;
    int extrapolateEnd = 0;
+    int eatFromEnd = 0;
    size_t amountToSkip = 0;

    if (stopping)
@ -477,6 +723,11 @@ tryagain:
    while (inpOffset == inpSize) {
        size_t samplesRead = 0;
        
+        BOOL isFloat = !!(inputFormat.mFormatFlags & kAudioFormatFlagIsFloat);
+        BOOL isUnsigned = !isFloat && !(inputFormat.mFormatFlags & kAudioFormatFlagIsSignedInteger);
+
+        uint8_t fillByte = (dsd2pcm) ? 0x55 : ((isUnsigned) ? 0x80 : 0x00);
+        
        // Approximately the most we want on input
        ioNumberPackets = (amount - amountRead) / outputFormat.mBytesPerPacket;
        
@ -490,97 +741,36 @@ tryagain:
        if (!inputBuffer || inputBufferSize < newSize)
            inputBuffer = realloc( inputBuffer, inputBufferSize = newSize * 3 );
        
-        // Pad end of track with floats. For simplicity, pad start in track
-        // native format.
-        
-        if (stopping || [self shouldContinue] == NO || [self endOfStream] == YES)
-        {
-            if (!skipResampler && !latencyPostfill)
-            {
-                ioNumberPackets = (int)resampler->latency(resampler_data);
-                newSize = ioNumberPackets * floatFormat.mBytesPerPacket;
-                newSize += inpSize;
-                if (!inputBuffer || inputBufferSize < newSize)
-                    inputBuffer = realloc( inputBuffer, inputBufferSize = newSize * 3);
-                
-                extrapolateEnd = ioNumberPackets;
-                
-                // Extrapolate end samples
-                if ( inpSize < sizeof(floatConvertedLast))
-                {
-                    size_t inpTotal = newSize + floatConvertedSize;
-                    if (inpTotal > sizeof(floatConvertedLast))
-                        inpTotal = sizeof(floatConvertedLast);
-                    
-                    if (inpTotal - newSize < floatConvertedSize)
-                    {
-                        memmove(floatConvertedLast, ((uint8_t*)floatConvertedLast) + newSize, inpTotal - newSize);
-                        floatConvertedSize = inpTotal - newSize;
-                    }
-                    
-                    memcpy(((uint8_t*)floatConvertedLast) + floatConvertedSize, inputBuffer, inpSize);
-                    
-                    extrapolate( floatConvertedLast, floatFormat.mChannelsPerFrame, inpTotal / floatFormat.mBytesPerPacket, extrapolateEnd, NO, &extrapolateBuffer, &extrapolateBufferSize );
-                    
-                    newSize = ioNumberPackets * floatFormat.mBytesPerPacket;
-                    
-                    memcpy( inputBuffer, ((uint8_t*)floatConvertedLast) + inpTotal - newSize, newSize );
-                    
-                    inpSize = newSize;
-                    inpOffset = 0;
-                }
-                else
-                {
-                    extrapolate( inputBuffer, floatFormat.mChannelsPerFrame, newSize / floatFormat.mBytesPerPacket, extrapolateEnd, NO, &extrapolateBuffer, &extrapolateBufferSize );
-                
-                    inpOffset = inpSize;
-                    inpSize = newSize;
-                }
-                latencyPostfill = YES;
-                break;
-            }
-            else
-            {
-                convertEntered = NO;
-                return amountRead;
-            }
-        }
-
-        size_t amountToWrite = ioNumberPackets * inputFormat.mBytesPerPacket;
+        ssize_t amountToWrite = ioNumberPackets * inputFormat.mBytesPerPacket;
        amountToSkip = 0;
        
-        BOOL isFloat = !!(inputFormat.mFormatFlags & kAudioFormatFlagIsFloat);
-        BOOL isUnsigned = !isFloat && !(inputFormat.mFormatFlags & kAudioFormatFlagIsSignedInteger);
-
        if (!skipResampler)
        {
            if (latencyStarted < 0)
            {
                latencyStarted = resampler->latency(resampler_data);
                extrapolateStart = (int)latencyStarted;
+                if (dsd2pcm) extrapolateStart += dsd2pcmLatency;
            }

            if (latencyStarted)
            {
-                size_t latencyToWrite = latencyStarted * inputFormat.mBytesPerPacket;
+                ssize_t latencyToWrite = latencyStarted * inputFormat.mBytesPerPacket;
                if (latencyToWrite > amountToWrite)
                    latencyToWrite = amountToWrite;
                
-                if (isUnsigned)
-                    memset(inputBuffer, 0x80, latencyToWrite);
-                else
-                    memset(inputBuffer, 0, latencyToWrite);
+                memset(inputBuffer, fillByte, latencyToWrite);

                amountToSkip = latencyToWrite;
                amountToWrite -= amountToSkip;

-                latencyEaten = latencyStarted * sampleRatio;
+                latencyEaten = extrapolateStart * sampleRatio;

                latencyStarted -= latencyToWrite / inputFormat.mBytesPerPacket;
            }
        }
        
-        size_t bytesReadFromInput = 0;
+        ssize_t bytesReadFromInput = 0;
        
        while (bytesReadFromInput < amountToWrite && !stopping && [self shouldContinue] == YES && [self endOfStream] == NO)
        {
@ -595,8 +785,40 @@ tryagain:
            }
        }
        
-        bytesReadFromInput += amountToSkip;
+        // Pad end of track with input format silence
        
+        if (stopping || [self shouldContinue] == NO || [self endOfStream] == YES)
+        {
+            if (!skipResampler && !latencyPostfill)
+            {
+                ioNumberPackets = (int)resampler->latency(resampler_data);
+                newSize = ioNumberPackets * inputFormat.mBytesPerPacket;
+                newSize += bytesReadFromInput;
+                if (!inputBuffer || inputBufferSize < newSize)
+                    inputBuffer = realloc( inputBuffer, inputBufferSize = newSize * 3);
+
+                latencyPostfill = ioNumberPackets;
+
+                memset(inputBuffer + bytesReadFromInput, fillByte, latencyPostfill * inputFormat.mBytesPerPacket);
+
+                bytesReadFromInput = newSize;
+
+                extrapolateEnd = ioNumberPackets;
+                if (dsd2pcm)
+                {
+                    extrapolateEnd += dsd2pcmLatency;
+                    eatFromEnd = dsd2pcmLatency * sampleRatio;
+                }
+            }
+        }
+        
+        if (!bytesReadFromInput) {
+            convertEntered = NO;
+            return amountRead;
+        }
+
+        bytesReadFromInput += amountToSkip;
+
        if (bytesReadFromInput &&
            (inputFormat.mFormatFlags & kAudioFormatFlagIsBigEndian))
        {
@ -616,7 +838,15 @@ tryagain:
        if (bytesReadFromInput && !isFloat)
        {
            size_t bitsPerSample = inputFormat.mBitsPerChannel;
-            if (bitsPerSample <= 8) {
+            if (bitsPerSample == 1) {
+                samplesRead = bytesReadFromInput / inputFormat.mChannelsPerFrame;
+                convert_dsd_to_f32(inputBuffer + bytesReadFromInput, inputBuffer, samplesRead, inputFormat.mChannelsPerFrame, dsd2pcm);
+                memmove(inputBuffer, inputBuffer + bytesReadFromInput, samplesRead * inputFormat.mChannelsPerFrame * sizeof(float));
+                bitsPerSample = 32;
+                bytesReadFromInput = samplesRead * inputFormat.mChannelsPerFrame * sizeof(float);
+                isFloat = YES;
+            }
+            else if (bitsPerSample <= 8) {
                samplesRead = bytesReadFromInput;
                if (!isUnsigned)
                    convert_s8_to_s16(inputBuffer + bytesReadFromInput, inputBuffer, samplesRead);
@ -668,6 +898,14 @@ tryagain:
            extrapolate( inputBuffer, floatFormat.mChannelsPerFrame, bytesReadFromInput / floatFormat.mBytesPerPacket, extrapolateStart, YES, &extrapolateBuffer, &extrapolateBufferSize);
            extrapolateStart = 0;
        }
+        else if (extrapolateEnd)
+        {
+            extrapolate( inputBuffer, floatFormat.mChannelsPerFrame, bytesReadFromInput / floatFormat.mBytesPerPacket, extrapolateEnd, NO, &extrapolateBuffer, &extrapolateBufferSize);
+            extrapolateEnd = 0;
+            bytesReadFromInput -= eatFromEnd * floatFormat.mBytesPerPacket;
+            if (bytesReadFromInput < 0)
+                bytesReadFromInput = 0;
+        }
        
        // Input now contains bytesReadFromInput worth of floats, in the input sample rate
        inpSize = bytesReadFromInput;
@ -860,6 +1098,19 @@ static float db_to_scale(float db)
        || (isFloat && !(inputFormat.mBitsPerChannel == 32 || inputFormat.mBitsPerChannel == 64)))
        return NO;
    
+    if (inputFormat.mBitsPerChannel == 1) {
+        // Decimate this for speed
+        inputFormat.mSampleRate *= 1.0 / 8.0;
+        dsd2pcmCount = inputFormat.mChannelsPerFrame;
+        dsd2pcm = calloc(dsd2pcmCount, sizeof(void*));
+        dsd2pcm[0] = dsd2pcm_alloc();
+        dsd2pcmLatency = dsd2pcm_latency(dsd2pcm[0]);
+        for (size_t i = 1; i < dsd2pcmCount; ++i)
+        {
+            dsd2pcm[i] = dsd2pcm_dup(dsd2pcm[0]);
+        }
+    }
+    
    // These are really placeholders, as we're doing everything internally now
    
    floatFormat = inputFormat;
@ -1006,6 +1257,16 @@ static float db_to_scale(float db)
        resampler = NULL;
        resampler_data = NULL;
    }
+    if (dsd2pcm && dsd2pcmCount)
+    {
+        for (size_t i = 0; i < dsd2pcmCount; ++i)
+        {
+            dsd2pcm_free(dsd2pcm[i]);
+            dsd2pcm[i] = NULL;
+        }
+        free(dsd2pcm);
+        dsd2pcm = NULL;
+    }
    if (extrapolateBuffer)
    {
        free(extrapolateBuffer);
--- a/Plugins/WavPack/WavPackDecoder.m
+++ b/Plugins/WavPack/WavPackDecoder.m
@ -225,31 +225,15 @@ int32_t WriteBytesProc(void *ds, void *data, int32_t bcount)
 	int16_t				*alias16;
 	int32_t				*alias32;
    
-    UInt32               trueFrames = frames;
-    
-    if (isDSD)           trueFrames = (frames / 8) & ~7;
-    
-    size_t newSize = trueFrames*sizeof(int32_t)*channels;
+    size_t newSize = frames*sizeof(int32_t)*channels;
    if (!inputBuffer || newSize > inputBufferSize) {
        inputBuffer = realloc(inputBuffer, inputBufferSize = newSize);
    }
 	
-	samplesRead	= WavpackUnpackSamples(wpc, inputBuffer, trueFrames);
+	samplesRead	= WavpackUnpackSamples(wpc, inputBuffer, frames);
 	
 	switch(bitsPerSample) {
        case 1:
-            alias8 = buf;
-            for (sample = 0; sample < samplesRead * channels;) {
-                for (int channel = 0; channel < channels; channel++) {
-                    for (int i = 0, mask = 0x80; i < 8; ++i, mask >>= 1) {
-                        alias8[i * channels] = (inputBuffer[sample] & mask) ? 127 : -128;
-                    }
-                    alias8 += 1;
-                    sample++;
-                }
-                alias8 += 7 * channels;
-            }
-            break;
 		case 8:
 			// No need for byte swapping
 			alias8 = buf;
@ -285,9 +269,6 @@ int32_t WriteBytesProc(void *ds, void *data, int32_t bcount)
 			ALog(@"Unsupported sample size: %d", bitsPerSample);
 	}
    
-    if (isDSD)
-        samplesRead *= 8;
-	
 	return samplesRead;
 }