cog/Frameworks/vgmstream/vgmstream/src/coding/ice_decoder_icelib.c

1434 lines
44 KiB
C

/* Decodes Inti Creates' BIGRP files with custom codecs, found in games using their current
* "Inti Creates Engine" ("ICE") / "Imperial Engine" ('IMP'?). Engine's name is said to be
* the latter (ICE being the earlier iteration) but debug info still shows the former.
* Reverse engineered from various exes (if you use this as a base credit it, please).
*
* This code tries to follow the original closely for documentation purposes, with some extra
* error control (original doesn't check zlib errors or buf sizes) plus a few extra structs/functions
* that were likely inline'd (such as bitreaders). */
//TODO change to streaming decoder
// Currently lib expects most data in memory. Due to how format is designed it's not the
// easiest thing to change, to be fixed it later:
// - data is divided into 2 blocks (intro+body) that are decoded separatedly
// (streaming should read up to block max)
// - code data isn't divided into frames, just keeps reading from the file buf
// - "range" decoder has linear data, and should be easy enough to stream, but it's rarely used.
// - "dct" decoder has a big chunk (+30%) of codebook data at the beginning of each block, then
// code data *but* decoder reads simultaneously from both places. Files can be rather big
// (2 mins = 6mb, codebooks = ~1.5mb). Would need to pre-read all the codebooks (still big) then
// stream data, or seek around to codebooks (thrashes FILE buffers).
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "ice_decoder_icelib.h"
/* use miniz (API-compatible) to avoid adding external zlib just for this codec
* - https://github.com/richgel999/miniz */
#include "../util/miniz.h"
//#include "zlib.h"
#define ICESND_MAX_CHANNELS 2
/* ************************************************************ */
/* COMMON */
/* ************************************************************ */
static inline uint8_t get_u8(const uint8_t* p) {
uint8_t ret;
ret = ((uint16_t)(const uint8_t)p[0]) << 0;
return ret;
}
static inline uint16_t get_u16le(const uint8_t* p) {
uint16_t ret;
ret = ((uint16_t)(const uint8_t)p[0]) << 0;
ret |= ((uint16_t)(const uint8_t)p[1]) << 8;
return ret;
}
static inline uint32_t get_u32le(const uint8_t* p) {
uint32_t ret;
ret = ((uint32_t)(const uint8_t)p[0]) << 0;
ret |= ((uint32_t)(const uint8_t)p[1]) << 8;
ret |= ((uint32_t)(const uint8_t)p[2]) << 16;
ret |= ((uint32_t)(const uint8_t)p[3]) << 24;
return ret;
}
/* bigrp entry info as read from header */
typedef struct {
uint32_t hash1; /* usually matches filename, different files vary on bytes, seems internally used to identify files */
uint32_t hash2; /* group id? repeated in several files, doesn't seem used */
uint32_t codec; /* 00: range, 01: metadata, 02: midi, 03: DCT */
/* rest of header varies per codec (padded until ~0x40 for all) */
/* codec 01 entry: */
/* - offset */
/* - size */
/* codec 02 entry: */
/* - config? (big value, same for all entries) */
/* - midi offset */
/* - midi size */
/* - midi config? (~0x30C0, possibly some size) */
/* - midi config? (~0x4036, then divided by 180.0) */
/* codec 00/03 entry */
int sample_rate;
int channels;
int spf; /* always 16, internally used for pcm size calculations */
int unknown; /* some kind of low-ish value, volume? (seen 0x40~0x7F) */
int loop_flag;
int frame_codes; /* 0x64 in codec 00, 0x00 in codec 03 (possibly "RangeBlockSize") */
/* has one "intro" block then one "body" block; intro block may be zero in full loops/no loops */
uint32_t intro_samples;
uint32_t intro_zsize;
uint32_t intro_offset;
uint32_t body_samples;
uint32_t body_zsize;
uint32_t body_offset;
/* rest: padding until entry_size */
} bigrp_entry_t;
/* base bigrp header and extra config */
typedef struct {
uint32_t head_size;
uint32_t entry_size;
int total_subsongs;
uint32_t dummy;
} bigrp_header_t;
/* block/data format (handled later):
*
* codec 00 (range):
* - zlib block (32b deflated size + zlib data)
*
* codec 03 (dct)
* - codeinfo block (see codeinfo_parse)
* - zlib tables
* - data
*
* codec 01 (data?)
* - ?
* Not parsed as audio in the IcePlayer init. Usually paired with midis, has several sections and some
* point to entries using hash1, so probably soundfont config, but not all bigrp with midis have this.
*
* codec 02 (midi)
* - standard midi (MThd)
*/
/* OG code casts buffer to this struct, read in a more portable fashion */
static int bigrp_entry_parse(bigrp_entry_t* entry, const uint8_t* buf, int buf_size) {
if (buf_size < 0x34)
goto fail;
entry->hash1 = get_u32le(buf + 0x00);
entry->hash2 = get_u32le(buf + 0x04);
entry->codec = get_u32le(buf + 0x08); /* u8 in decoder, read fully to validate */
switch (entry->codec) {
case ICESND_CODEC_RANGE:
case ICESND_CODEC_DCT:
entry->sample_rate = get_u32le(buf + 0x0c);
entry->channels = get_u8 (buf + 0x10);
entry->spf = get_u8 (buf + 0x11);
entry->unknown = get_u16le(buf + 0x12);
entry->loop_flag = get_u32le(buf + 0x14); /* u8 in decoder, read fully to validate */
entry->frame_codes = get_u32le(buf + 0x18); /* codec 00 only (includes N channels) */
entry->intro_samples = get_u32le(buf + 0x1c);
entry->intro_zsize = get_u32le(buf + 0x20);
entry->intro_offset = get_u32le(buf + 0x24);
entry->body_samples = get_u32le(buf + 0x28);
entry->body_zsize = get_u32le(buf + 0x2c);
entry->body_offset = get_u32le(buf + 0x30);
if (entry->sample_rate < 2000 || entry->sample_rate > 48000) /* just in case */
goto fail;
if (entry->channels < 1 || entry->channels > 2 || entry->spf != 16) /* not seen */
goto fail;
if (entry->frame_codes != 0 && entry->frame_codes != 0x64)
goto fail;
if (entry->frame_codes % entry->channels != 0) /* assumed that N codes = N/chs samples */
goto fail;
if (entry->loop_flag != 0 && entry->loop_flag != 1)
goto fail;
/* probably wouldn't matter, same with other sizes */
if (entry->intro_samples == 0 && entry->body_samples == 0)
goto fail;
if (entry->channels > 1 && entry->codec == ICESND_CODEC_RANGE) /* not seen */
goto fail;
break;
case ICESND_CODEC_DATA:
case ICESND_CODEC_MIDI:
default:
goto fail;
}
return ICESND_RESULT_OK;
fail:
return ICESND_ERROR_HEADER;
}
/* read main .bigrp header. Earlier games used standard Nintendo's BFGRP and BCGRP,
* and this format seems kind of inspired by it, so presumably BIGRP = Binary Inti Group */
static int bigrp_header_parse(bigrp_header_t* hdr, const uint8_t* buf, int buf_size, int subsong) {
if (buf_size < 0x0c)
goto fail;
/* read base header */
hdr->head_size = get_u32le(buf + 0x00);
hdr->entry_size = get_u32le(buf + 0x04);
hdr->total_subsongs = get_u32le(buf + 0x08);
if (hdr->head_size > buf_size)
goto fail;
if (hdr->head_size >= 0x10)
hdr->dummy = get_u32le(buf + 0x0c);
else
hdr->dummy = 0x00;
/* 0x0c: Bloodstained COTM (Vita/3DS), Mighty Gunvolt Burst (PC); 0x10: rest */
if (hdr->head_size != 0x0c && hdr->head_size != 0x10)
goto fail;
/* same (no changes, after 0x34 is padding) */
if (hdr->entry_size != 0x34 && hdr->entry_size != 0x40)
goto fail;
if (hdr->dummy != 0x00)
goto fail;
if (subsong < 1 || subsong > hdr->total_subsongs)
goto fail;
return ICESND_RESULT_OK;
fail:
return ICESND_ERROR_HEADER;
}
/* original also tries to call's zlib free is needed (not set though) */
static void zlib_end(z_stream* strm, int* p_zlib_init) {
if (*p_zlib_init) {
inflateEnd(strm);
/* original also tries to call's zlib free if needed (not set though) */
*p_zlib_init = 0;
}
}
static int zlib_init(z_stream* strm, int* p_zlib_init, const uint8_t* buf, int buf_size) {
int err;
zlib_end(strm, p_zlib_init);
strm->zalloc = 0;
strm->zfree = 0;
strm->opaque = 0;
strm->next_in = 0;
strm->avail_in = 0;
err = inflateInit(strm);
if (err < 0) return ICESND_ERROR_SETUP;
*p_zlib_init = 1;
/* zlib data starts with the decompressed size */
strm->next_in = buf + 0x04;
strm->avail_in = buf_size - 0x04;
return ICESND_RESULT_OK;
}
/* ************************************************************ */
/* RANGE */
/* ************************************************************ */
/* Inti Creates's "range" decoder, internally IceSoundCodecDecoderRange class. Seems similar to Sony's
* "adaptive dynamic range coding" (ADRC) compression (not what's usually called "range coding").
*
* Data is zlibbed (though doesn't save much) then divided into VBR frames. Each frame has a 24-bit LE
* header that defines a "range" (min..max) and quantized codes' bits (often 6 or 7), then 100 codes.
* Unsigned codes just map to a relative value within the range, and final sample is range-min + range-value.
* Stereo alternates L-R header then L R L R ... samples
*
* For example, if a range = [0, 6000], and bits = 6:
* - code=0 > value=0 (min), code=63 > value=6000 (max), code=31 > value=2952 (in-between), ...
*/
#define RANGE_DECODE_BUFFER 0x800
typedef struct {
const uint8_t* inbuf;
int inbuf_size;
int max_samples;
int frame_codes;
int samples_done;
uint32_t outbuf_pos;
uint8_t outbuf[RANGE_DECODE_BUFFER];
z_stream strm;
int codes_left;
int bitpos; /* within curr_byte */
int16_t range_min[ICESND_MAX_CHANNELS];
int16_t range_max[ICESND_MAX_CHANNELS];
uint16_t range_bits[ICESND_MAX_CHANNELS];
uint16_t range_mask[ICESND_MAX_CHANNELS];
uint8_t curr_byte;
uint8_t spf; /* not needed to decode though */
uint8_t channels;
int zlib_init;
/* extra */
uint32_t outbuf_max; /* original has all data in memory, but we read zlib in chunks that may be smaller */
} range_handle_t;
static range_handle_t* range_decoder_open() {
range_handle_t* ctx = NULL;
//ctx = calloc(1, sizeof(range_handle_t));
ctx = malloc(sizeof(range_handle_t));
if (!ctx) goto fail;
ctx->inbuf = NULL;
ctx->inbuf_size = 0;
ctx->max_samples = 0;
ctx->frame_codes = 0;
ctx->spf = 0;
ctx->zlib_init = 0;
return ctx;
fail:
return NULL;
}
static void range_decoder_close(range_handle_t* ctx) {
if (!ctx)
return;
zlib_end(&ctx->strm, &ctx->zlib_init);
free(ctx);
}
static int range_decoder_reset(range_handle_t* ctx) {
int err;
err = zlib_init(&ctx->strm, &ctx->zlib_init, ctx->inbuf, ctx->inbuf_size);
if (err < ICESND_RESULT_OK) return err;
ctx->outbuf_pos = 0xFFFFFFFF; //sizeof(ctx->outbuf); /* force init */
ctx->samples_done = 0;
ctx->codes_left = 0;
ctx->bitpos = 0;
ctx->outbuf_max = 0;
return ICESND_RESULT_OK;
}
static int range_decoder_block_setup(range_handle_t* ctx, const uint8_t* buf, int buf_size, bigrp_entry_t* etr, int max_samples) {
int err;
ctx->inbuf = buf;
ctx->inbuf_size = buf_size;
ctx->max_samples = max_samples;
ctx->frame_codes = etr->frame_codes;
ctx->spf = etr->spf;
ctx->channels = etr->channels;
err = range_decoder_reset(ctx);
if (err < ICESND_RESULT_OK) return err;
return ICESND_RESULT_OK;
}
#if 0
static void fill_zlib(z_stream* strm, data_t* data) {
/* zlib sets this to 0 once consumed */
if (strm->avail_in > 0)
return;
/* buffer is smaller than size, so this will be called N times */
if (data->data_left) {
int bytes = data->buf_size;
if (bytes > data->data_left)
bytes = data->data_left;
strm->next_in = data->buf;
strm->avail_in = data->cb.read(data->buf, 1, bytes, data->cb.arg);
data->data_left -= strm->avail_in;
}
}
#endif
/* get next byte from the zlib stream */
static void range_load_byte(range_handle_t* ctx) {
if (ctx->outbuf_pos >= ctx->outbuf_max) { /* OG: >= sizeof(ctx->outbuf) */
//fill_zlib(&ctx->strm, ctx->data);
ctx->strm.avail_out = sizeof(ctx->outbuf);
ctx->strm.next_out = ctx->outbuf;
inflate(&ctx->strm, Z_NO_FLUSH);
//if (err < Z_OK) return ICESND_ERROR_DECODER; /* OG: no error control (shouldn't matter) */
ctx->outbuf_pos = 0;
ctx->outbuf_max = sizeof(ctx->outbuf) - ctx->strm.avail_out;
}
ctx->curr_byte = ctx->outbuf[ctx->outbuf_pos];
ctx->outbuf_pos++;
}
/* read 24-bit header, decompressing bytes if necessary */
static void range_load_header(range_handle_t* ctx, int ch) {
uint32_t frame_header;
range_load_byte(ctx);
frame_header = ctx->curr_byte;
range_load_byte(ctx);
frame_header = (ctx->curr_byte << 8) | frame_header;
range_load_byte(ctx);
frame_header = (ctx->curr_byte << 16) | frame_header;
/* get signed range and quantized bits for this frame */
ctx->range_min[ch] = (frame_header >> 3) << 5; /* & 0xFFE0, upper 11 bits (signed) */
ctx->range_max[ch] = (frame_header >> 14) << 6; /* & 0xFFC0, upper 10 bits (signed) */ //(frame_header >> 8) & 0xffc0;
ctx->range_bits[ch] = (frame_header & 7) + 1;
ctx->range_mask[ch] = (1 << ctx->range_bits[ch]) - 1;
}
/* decode next bitstream's code, decompressing bytes if necessary */
static int16_t range_get_sample(range_handle_t* ctx, int ch) {
int32_t code;
int16_t delta;
uint16_t mask = ctx->range_mask[ch];
/* get next code of N-bits (doesn't seem to use an actual bitstream class) */
if (ctx->bitpos == 0) {
range_load_byte(ctx);
}
code = (ctx->curr_byte >> (ctx->bitpos)) & mask;
if (ctx->bitpos + ctx->range_bits[ch] > 8) {
range_load_byte(ctx);
code |= (ctx->curr_byte << (8 - ctx->bitpos)) & mask;
ctx->bitpos = (ctx->bitpos + ctx->range_bits[ch]) - 8;
}
else {
ctx->bitpos = (ctx->bitpos + ctx->range_bits[ch]) & 7;
}
/* calculate code's range value and final sample */
delta = code * (ctx->range_max[ch] - ctx->range_min[ch]) / mask;
return ctx->range_min[ch] + delta; /* no clamp */
}
/* decode N samples and copy to sbuf.
* Internally decodes N samples at a time, and if asked for non-multiple number of samples it'll
* stop and resume properly from last copied sample of those 16. Return 1 if no more samples left. */
static int range_decoder_decode(range_handle_t* ctx, int16_t* sbuf, const int max_done, int* p_done) {
int ch;
*p_done = 0;
while (ctx->samples_done < ctx->max_samples) {
/* read frame header */
if (ctx->codes_left == 0) {
for (ch = 0; ch < ctx->channels; ch++) {
range_load_header(ctx, ch);
}
ctx->codes_left = ctx->frame_codes;
if (ctx->samples_done + ctx->frame_codes > ctx->max_samples)
ctx->codes_left = ctx->max_samples - ctx->samples_done;
ctx->bitpos = 0;
}
/* decode frame samples */
while (ctx->codes_left) {
for (ch = 0; ch < ctx->channels; ch++) {
*sbuf++ = range_get_sample(ctx, ch);
}
ctx->samples_done++;
ctx->codes_left--;
(*p_done)++;
if (*p_done >= max_done) /* samples left */
return ctx->samples_done >= ctx->max_samples; /* block done */
}
}
return ctx->samples_done >= ctx->max_samples; /* block done */
}
/* ************************************************************ */
/* DCT */
/* ************************************************************ */
/* Inti Creates's "dct" decoder, internally IceSoundCodecDecoderDCT class, a pretty simple DCT codec.
*
* Header stores one codebook per band (max 16 * channels) of quantized bits, in zlibbed chunks of 4-bit nibbles.
* Data is just a bitstream of up to 16 bands (L then R) of variable bit codes (max 16-bit) that depend on previous
* 16 bands, dequantized using iDCT. Uses mid-side stereo but otherwise no other features like frames, scalefactors
* or overlaps/delay. Samples are encoded directly as +-32768, 16 at a time.
*/
#define DCT_MAX_BANDS 16
#define DCT_MAX_TRANSFORM 8
#define DCT_MAX_PREV 4
#define DCT_MAX_PREV_MASK 0x3
#define DCT_CODEBOOK_BUFFER 0x100
typedef struct {
const uint8_t* buf;
int bitpos;
int bitstart;
int max_bits;
} dct_bitreader_t;
typedef struct {
uint32_t table_size;
uint8_t init_scale;
uint8_t bands;
uint8_t channels;
uint8_t unused;
uint32_t max_samples;
uint32_t cbk_offset[ICESND_MAX_CHANNELS][DCT_MAX_BANDS];
uint32_t cbk_size[ICESND_MAX_CHANNELS][DCT_MAX_BANDS];
uint32_t data_start;
uint32_t data_size;
} dct_codeinfo_t;
typedef struct {
z_stream strm;
uint8_t outbuf[DCT_CODEBOOK_BUFFER];
dct_bitreader_t br;
int zlib_init;
} dct_codebook_t;
typedef struct {
dct_codeinfo_t codeinfo_mem; /* OG code just casts a pointer to this, keep struct around */
dct_codeinfo_t* codeinfo;
int samples_done;
dct_codebook_t codebook[ICESND_MAX_CHANNELS][DCT_MAX_BANDS];
dct_bitreader_t br;
float transform[DCT_MAX_TRANSFORM][DCT_MAX_BANDS];
float unused[DCT_MAX_TRANSFORM][DCT_MAX_BANDS]; /* ? */
int16_t spectra[ICESND_MAX_CHANNELS][DCT_MAX_PREV][DCT_MAX_BANDS];
int spectra_curr;
int16_t sbuf_tmp[DCT_MAX_BANDS * ICESND_MAX_CHANNELS]; /* interleaved */
float scales[16];
} dct_handle_t;
/* ****************************** */
/* IceSoundCodecDecoder(Bitreader) */
/* OG bitreader uses u32 buf and reads u32le at a time (to simplify shifting), and aligns for 32-bit,
* since blocks pointer can start in the middle (whole file loaded in memory + non-padded blocks) */
static void dct_bitreader_init(dct_bitreader_t* ctx) {
ctx->buf = NULL;
ctx->bitpos = 0;
ctx->bitstart = 0;
ctx->max_bits = 0;
}
static void dct_bitreader_set(dct_bitreader_t* ctx, const uint8_t* buf, int buf_size) {
//unsigned int* buf32 = (unsigned int *)(buf);
//ctx->buf32 = (uin32_t*)((uint32_t)buf32 & 0xFFFFFFFC); /* align to 32-bit boundary */
//ctx->bitstart = 8 * ((uint8_t)buf32 & 3); /* align to pointer start */
ctx->buf = buf;
ctx->bitstart = 8 * 0; /* non-aligned ok */
ctx->bitpos = ctx->bitstart;
ctx->max_bits = 8 * buf_size;
}
static int dct_bitreader_is_over(dct_bitreader_t* ctx) {
return ctx->bitpos >= ctx->bitstart + ctx->max_bits;
}
static uint32_t dct_bitreader_get(dct_bitreader_t* ctx, int bits) {
uint32_t code32;
uint8_t shift;
int pos;
uint32_t mask;
if (ctx->bitpos + bits > ctx->bitstart + ctx->max_bits) /* ? */
return 0;
pos = ctx->bitpos >> 3;
shift = ctx->bitpos & 0x7; /* within u8 */
code32 = ctx->buf[pos] >> shift;
if (bits + shift > 8) {
code32 |= ctx->buf[pos+1] << (8u - shift);
if (bits + shift > 16) {
code32 |= ctx->buf[pos+2] << (16u - shift);
if (bits + shift > 24) {
code32 |= ctx->buf[pos+3] << (24u - shift);
if (bits + shift > 32) {
code32 |= ctx->buf[pos+4] << (32u - shift);
}
}
}
}
//pos = ctx->br.bitpos >> 5;
//shift = ctx->br.bitpos & 0x1f; /* within u32 */
//code32 = ctx->buf32[pos] >> shift;
//if (qbits + shift > 32) {
// code32 |= ctx->buf32[pos+1] << (32u - shift);
//}
ctx->bitpos += bits;
mask = (((1 << bits)) - 1);
return code32 & mask;
}
/* ****************************** */
/* IceSoundCodecDecoder(Codebook) */
static void dct_codebook_init(dct_codebook_t* ctx) {
/* no alloc needed (part of dct_handle_t) */
dct_bitreader_init(&ctx->br);
ctx->zlib_init = 0;
}
static void dct_codebook_close(dct_codebook_t* ctx) {
if (!ctx)
return;
zlib_end(&ctx->strm, &ctx->zlib_init);
/* no free needed */
}
static int dct_codebook_reset(dct_codebook_t* ctx, const uint8_t* buf, int buf_size) {
int err;
err = zlib_init(&ctx->strm, &ctx->zlib_init, buf, buf_size);
if (err < ICESND_RESULT_OK) return err;
dct_bitreader_init(&ctx->br);
return ICESND_RESULT_OK;
}
/* Read next quantized value's bits from zlibbed codebook. Data is in LSB order and codes 4-bits, like:
* 0x10,0x32,0x54,0x76,0x98... = 0 1 2 3 4 5 6 7 8 9... */
static uint8_t dct_codebook_get_qbits(dct_codebook_t* ctx) {
uint32_t qbits;
if (dct_bitreader_is_over(&ctx->br)) {
ctx->strm.avail_out = sizeof(ctx->outbuf);
ctx->strm.next_out = ctx->outbuf;
inflate(&ctx->strm, Z_NO_FLUSH);
//if (err < Z_OK) return ICESND_ERROR_DECODER; /* OG: no error control (shouldn't matter) */
dct_bitreader_set(&ctx->br, ctx->outbuf, sizeof(ctx->outbuf) - ctx->strm.avail_out);
}
qbits = dct_bitreader_get(&ctx->br, 4);
return qbits;
}
/* *********************** */
/* IceSoundCodecDecoderDCT */
static dct_handle_t* dct_decoder_open() {
int i, ch, band;
dct_handle_t* ctx = NULL;
//ctx = calloc(1, sizeof(dct_handle_t));
ctx = malloc(sizeof(dct_handle_t));
if (!ctx) goto fail;
ctx->codeinfo = NULL;
/* init all codebook's base values */
for (ch = 0; ch < ICESND_MAX_CHANNELS; ch++) {
for (band = 0; band < DCT_MAX_BANDS; band++) {
dct_codebook_t* codebook = &ctx->codebook[ch][band];
dct_codebook_init(codebook);
}
}
dct_bitreader_init(&ctx->br);
for (i = 0; i < DCT_MAX_BANDS; i++) {
ctx->scales[i] = 1.0f;
}
return ctx;
fail:
return NULL;
}
static void dct_decoder_close(dct_handle_t* ctx) {
int ch, band;
/* setup all codebook's zlib streams (even if not used, since we can close before anything is set) */
for (ch = 0; ch < ICESND_MAX_CHANNELS; ch++) {
for (band = 0; band < DCT_MAX_BANDS; band++) {
dct_codebook_t* codebook = &ctx->codebook[ch][band];
dct_codebook_close(codebook);
}
}
free(ctx);
}
static int dct_decoder_reset(dct_handle_t* ctx, const uint8_t* buf) {
int err;
int ch, band;
dct_codeinfo_t* ci = ctx->codeinfo;
/* OG code doesn't pass buf (since reset is a virtual method), but rather codeinfo doubles as a pointer to data start */
/* close all codebook's zlib streams */
for (ch = 0; ch < ci->channels; ch++) {
for (band = 0; band < ci->bands; band++) {
dct_codebook_t* codebook = &ctx->codebook[ch][band];
const uint8_t* cbk_start = buf + ci->cbk_offset[ch][band];
int cbk_size = ci->cbk_size[ch][band];
err = dct_codebook_reset(codebook, cbk_start, cbk_size);
if (err < ICESND_RESULT_OK) return err;
}
}
dct_bitreader_set(&ctx->br, buf + ci->data_start, ci->data_size);
memset(ctx->spectra, 0, sizeof(ctx->spectra));
ctx->samples_done = 0;
ctx->spectra_curr = 0;
return ICESND_RESULT_OK;
}
/* transform spectrum into samples (iDCT) */
static void dct_decoder_transform(dct_handle_t* ctx, int16_t* sbuf_tmp, int channel, int pos) {
int i, band;
float fbuf[16] = {0}; /* no need to init as it's written in band 0 but gcc complains */
float f_curr;
dct_codeinfo_t* ci = ctx->codeinfo;
for (band = 0; band < ci->bands; band++) {
/* scales seems fixed to 1.0, maybe a remnant */
float coef = (float)ctx->spectra[channel][pos][band] * ctx->scales[band];
/* optimized butterfly ops? */
switch (band) {
case 0: /* bits 0000 */
f_curr = ctx->transform[0][band] * coef;
fbuf[0] = f_curr;
fbuf[1] = f_curr;
fbuf[2] = f_curr;
fbuf[3] = f_curr;
fbuf[4] = f_curr;
fbuf[5] = f_curr;
fbuf[6] = f_curr;
fbuf[7] = f_curr;
fbuf[8] = f_curr;
fbuf[9] = f_curr;
fbuf[10] = f_curr;
fbuf[11] = f_curr;
fbuf[12] = f_curr;
fbuf[13] = f_curr;
fbuf[14] = f_curr;
fbuf[15] = f_curr;
break;
case 1:
case 3:
case 5:
case 7:
case 9:
case 11:
case 13:
case 15: /* bits xxx1 */
f_curr = ctx->transform[0][band] * coef;
fbuf[0] += f_curr;
fbuf[15] -= f_curr;
f_curr = ctx->transform[1][band] * coef;
fbuf[1] += f_curr;
fbuf[14] -= f_curr;
f_curr = ctx->transform[2][band] * coef;
fbuf[2] += f_curr;
fbuf[13] -= f_curr;
f_curr = ctx->transform[3][band] * coef;
fbuf[3] += f_curr;
fbuf[12] -= f_curr;
f_curr = ctx->transform[4][band] * coef;
fbuf[4] += f_curr;
fbuf[11] -= f_curr;
f_curr = ctx->transform[5][band] * coef;
fbuf[5] += f_curr;
fbuf[10] -= f_curr;
f_curr = ctx->transform[6][band] * coef;
fbuf[6] += f_curr;
fbuf[9] -= f_curr;
f_curr = ctx->transform[7][band] * coef;
fbuf[7] += f_curr;
fbuf[8] -= f_curr;
break;
case 2u:
case 6u:
case 10:
case 14: /* bits xx10 */
f_curr = ctx->transform[0][band] * coef;
fbuf[0] += f_curr;
fbuf[7] -= f_curr;
fbuf[8] -= f_curr;
fbuf[15] += f_curr;
f_curr = ctx->transform[1][band] * coef;
fbuf[1] += f_curr;
fbuf[6] -= f_curr;
fbuf[9] -= f_curr;
fbuf[14] += f_curr;
f_curr = ctx->transform[2][band] * coef;
fbuf[2] += f_curr;
fbuf[5] -= f_curr;
fbuf[10] -= f_curr;
fbuf[13] += f_curr;
f_curr = ctx->transform[3][band] * coef;
fbuf[3] += f_curr;
fbuf[4] -= f_curr;
fbuf[11] -= f_curr;
fbuf[12] += f_curr;
break;
case 4:
case 12: /* bits x100 */
f_curr = ctx->transform[0][band] * coef;
fbuf[0] += f_curr;
fbuf[3] -= f_curr;
fbuf[4] -= f_curr;
fbuf[7] += f_curr;
fbuf[8] += f_curr;
fbuf[11] -= f_curr;
fbuf[12] -= f_curr;
fbuf[15] += f_curr;
f_curr = ctx->transform[1][band] * coef;
fbuf[1] += f_curr;
fbuf[2] -= f_curr;
fbuf[5] -= f_curr;
fbuf[6] += f_curr;
fbuf[9] += f_curr;
fbuf[10] -= f_curr;
fbuf[13] -= f_curr;
fbuf[14] += f_curr;
break;
case 8: /* bits 1000 */
f_curr = ctx->transform[0][band] * coef;
fbuf[0] += f_curr;
fbuf[1] -= f_curr;
fbuf[2] -= f_curr;
fbuf[3] += f_curr;
fbuf[4] += f_curr;
fbuf[5] -= f_curr;
fbuf[6] -= f_curr;
fbuf[7] += f_curr;
fbuf[8] += f_curr;
fbuf[9] -= f_curr;
fbuf[10] -= f_curr;
fbuf[11] += f_curr;
fbuf[12] += f_curr;
fbuf[13] -= f_curr;
fbuf[14] -= f_curr;
fbuf[15] += f_curr;
break;
default:
break;
}
}
/* copy float samples to sbuf samples */
for (i = 0; i < DCT_MAX_BANDS; i++) {
float sample = roundf(fbuf[i]);
/* interleaved (L: 0,2,4,6,8... R:1,3,5,7...) */
sbuf_tmp[channel + ci->channels * i] = (int16_t)sample; /* no clamp */
}
}
/* read current code from the bitstream, in LE byte order */
static int16_t dct_decoder_get_code(dct_handle_t* ctx, uint8_t qbits) {
uint32_t code32;
int16_t code16; /* also ok as int32 */
/* get code from bitstream */
if (qbits <= 0) { /* no resolution: 1-bit where 0 = 0 and 1 = -1 */
code32 = dct_bitreader_get(&ctx->br, 1);
code16 = (int16_t)((int16_t)code32 << 15) >> 15; /* sign extend */
}
else {
code32 = dct_bitreader_get(&ctx->br, qbits);
code16 = code32; /* qbits max 0..15 */
if (code16 < (1 << (qbits - 1))) /* negative encoding */
code16 = code16 - (1 << qbits);
}
return code16;
}
/* read codes for this channel */
static void dct_decoder_dequantize(dct_handle_t* ctx, int channel, int pos) {
int band;
dct_codeinfo_t* ci = ctx->codeinfo;
int16_t* spectra = ctx->spectra[channel][pos];
int16_t* spectra_prev1 = ctx->spectra[channel][(pos - 1) & DCT_MAX_PREV_MASK];
int16_t* spectra_prev2 = ctx->spectra[channel][(pos - 2) & DCT_MAX_PREV_MASK];
for (band = 0; band < ci->bands; band++) {
uint8_t qbits; /* common 7~10 bits, sometimes 12 too */
int16_t code; /* also ok as int32 */
/* get next code's resolution and code */
qbits = dct_codebook_get_qbits(&ctx->codebook[channel][band]);
code = dct_decoder_get_code(ctx, qbits);
/* calc final value based on previous */
spectra[band] = code + (int16_t)(2 * spectra_prev1[band]) - spectra_prev2[band];
}
}
/* restore L/R bands based on mid channel + side differences, ratio 1.0 + copy to final buffer */
static void dct_decoder_ms_stereo(dct_handle_t* ctx, int16_t* sbuf_tmp) {
int i;
dct_codeinfo_t* ci = ctx->codeinfo;
for (i = 0; i < DCT_MAX_BANDS; i++) {
int16_t sample_l = sbuf_tmp[0 + ci->channels * i];
int16_t sample_r = sbuf_tmp[1 + ci->channels * i];
ctx->sbuf_tmp[0 + ci->channels * i] = sample_l + sample_r;
ctx->sbuf_tmp[1 + ci->channels * i] = sample_l - sample_r;
}
}
/* decode N samples and copy to sbuf.
* Internally decodes 16 samples at a time, and if asked for non-multiple number of samples it'll
* stop and resume properly from last copied sample of those 16. Return 1 if no more samples left. */
static int dct_decoder_decode(dct_handle_t* ctx, int16_t* sbuf, const int max_done, int* p_done) {
int ch;
int16_t sbuf_loc[DCT_MAX_BANDS * ICESND_MAX_CHANNELS]; /* interleaved */
int16_t* sbuf_tmp;
dct_codeinfo_t* ci = ctx->codeinfo;
int samples_left;
*p_done = 0;
samples_left = max_done;
if (samples_left > ci->max_samples - ctx->samples_done)
samples_left = ci->max_samples - ctx->samples_done;
/* 2ch uses a tmp buffer to handle MS stereo */
if (ci->channels == 1)
sbuf_tmp = ctx->sbuf_tmp;
else
sbuf_tmp = sbuf_loc;
while (ctx->samples_done < ci->max_samples) {
if (!samples_left)
return ctx->samples_done >= ci->max_samples;
/* decode 16 samples (every 16 samples) */
if ((ctx->samples_done & 0xF) == 0) {
for (ch = 0; ch < ci->channels; ch++) {
dct_decoder_dequantize(ctx, ch, ctx->spectra_curr);
dct_decoder_transform(ctx, sbuf_tmp, ch, ctx->spectra_curr);
}
ctx->spectra_curr = (ctx->spectra_curr + 1) & DCT_MAX_PREV_MASK; /* 0..3 and back to 0 */
if (ci->channels == 2)
dct_decoder_ms_stereo(ctx, sbuf_tmp);
}
/* copy to output sbuf */
{
int sample_start;
int samples_copied;
/* start could be non-zero if max_done is non-multiple of 16 */
sample_start = ctx->samples_done & 0xF;
samples_copied = 16 - sample_start;
if (samples_copied > samples_left)
samples_copied = samples_left;
/* copy to output sbuf */
memcpy(sbuf, &ctx->sbuf_tmp[sample_start * ci->channels], sizeof(int16_t) * ci->channels * samples_copied);
sbuf += samples_copied * ci->channels;
ctx->samples_done += samples_copied;
samples_left -= samples_copied;
*p_done += samples_copied;
}
}
return ctx->samples_done >= ci->max_samples; /* block done */
}
/* OG code casts buffer to this struct, read in a more portable fashion */
static int dct_codeinfo_parse(dct_codeinfo_t* ci, const uint8_t* buf, int buf_size) {
int ch, i, pos;
if (buf_size < 0x114)
goto fail;
ci->table_size = get_u32le(buf + 0x00);
ci->init_scale = get_u8 (buf + 0x04);
ci->bands = get_u8 (buf + 0x05);
ci->channels = get_u8 (buf + 0x06);
ci->unused = get_u8 (buf + 0x07);
ci->max_samples = get_u32le(buf + 0x08);
pos = 0x0c;
for (ch = 0; ch < ICESND_MAX_CHANNELS; ch++) {
for (i = 0; i < DCT_MAX_BANDS; i++) {
ci->cbk_offset[ch][i] = get_u32le(buf + pos);
pos += 0x04;
}
}
for (ch = 0; ch < ICESND_MAX_CHANNELS; ch++) {
for (i = 0; i < DCT_MAX_BANDS; i++) {
ci->cbk_size[ch][i] = get_u32le(buf + pos);
pos += 0x04;
}
}
ci->data_start = get_u32le(buf + 0x10c);
ci->data_size = get_u32le(buf + 0x110);
if (ci->table_size > 0x114)
goto fail;
if (ci->bands < 1 || ci->bands > DCT_MAX_BANDS)
goto fail;
if (ci->channels < 1 || ci->channels > ICESND_MAX_CHANNELS)
goto fail;
if (ci->unused != 0x00)
goto fail;
if (buf_size < ci->data_start + ci->data_size)
goto fail;
return ICESND_RESULT_OK;
fail:
return ICESND_ERROR_SETUP;
}
/* base DCT unique coefs, used below to init the full table (see opus' analysis.c) */
static const float DCT_TRANSFORM_COEFS[16] = {
0.25f, 0.35185099f, 0.34676f, 0.33832899f,
0.32664099f, 0.31180599f, 0.29396901f, 0.27329999f,
0.25f, 0.224292f, 0.19642401f, 0.166664f,
0.135299f, 0.102631f, 0.068975002f, 0.034653999f,
};
static const float DCT_TRANSFORM_SCALES[16] = {
4.0, 6.0, 8.0, 10.0, 12.0, 12.0, 13.0, 15.0,
16.0, 16.0, 20.0, 24.0, 28.0, 35.0, 41.0, 41.0
};
static const int DCT_TRANSFORM_STEPS[16] = {
1, 8, 4, 8, 2, 8, 4, 8,
1, 8, 4, 8, 2, 8, 4, 8,
};
/* re-calculate DCT table, that depends on a current intro/body chunk's scale value */
static int dct_decoder_block_setup(dct_handle_t* ctx, const uint8_t* buf, int buf_size, bigrp_entry_t* etr) {
int i;
int err;
float scale;
float dct_coefs[DCT_MAX_BANDS];
dct_codeinfo_t* ci = &ctx->codeinfo_mem;
/* portable init */
err = dct_codeinfo_parse(ci, buf, buf_size);
if (err < ICESND_RESULT_OK) return err;
/* pre-calculate scaled coefs (mini optimization?) */
scale = ci->init_scale;
for (i = 0; i < DCT_MAX_BANDS; i++) {
dct_coefs[i] = DCT_TRANSFORM_COEFS[i] * scale;
}
/* transform for N=16, k=0..8? */
for (i = 0; i < DCT_MAX_BANDS; i++) {
int steps = DCT_TRANSFORM_STEPS[i];
int step;
int pos = i;
for (step = 0; step < steps; step++) {
float coef;
switch ((pos >> 4) & 3) {
case 1:
coef = -dct_coefs[16 - (pos & 0xF)];
break;
case 2:
coef = -dct_coefs[(pos & 0xF)];
break;
case 3:
coef = +dct_coefs[16 - (pos & 0xF)];
break;
default:
coef = +dct_coefs[(pos & 0xF)];
break;
}
pos += 2 * i;
//ctx->transform[step][i] = coef; /* somehow assigned twice originally? */
ctx->transform[step][i] = DCT_TRANSFORM_SCALES[i] * coef;
}
}
/* rest of setup */
ctx->codeinfo = ci;
err = dct_decoder_reset(ctx, buf);
if (err < ICESND_RESULT_OK) return err;
return ICESND_RESULT_OK;
}
/* ************************************************************ */
/* API */
/* ************************************************************ */
/* (not part of original code (but partially inspired by IceSSoundEng::IcePlayer) */
#define ICESND_BIGRP_SIZE 0x10
#define ICESND_ENTRY_SIZE 0x34
#define ICESND_BUF_SIZE 0x10000
struct icesnd_handle_t {
/* config*/
int target_subsong;
icesnd_callback_t cb;
/* state */
bigrp_header_t hdr;
bigrp_entry_t etr;
void* decoder;
int is_range;
int intro_init;
int body_init;
int intro_done;
/* absolute offset */
int intro_offset;
int body_offset;
uint8_t* blkbuf;
int blkbuf_size;
};
static int parse_header(icesnd_handle_t* ctx) {
int err;
uint8_t tmp[0x40];
const uint8_t* buf;
int buf_size;
uint32_t offset;
/* read common header size */
offset = 0x00;
if (ctx->cb.read) {
ctx->cb.seek(ctx->cb.arg, offset, SEEK_SET);
buf_size = ctx->cb.read(tmp, 1, 0x10, ctx->cb.arg);
buf = tmp;
}
else {
buf_size = ctx->cb.filebuf_size;
buf = ctx->cb.filebuf + offset;
}
err = bigrp_header_parse(&ctx->hdr, buf, buf_size, ctx->target_subsong);
if (err < ICESND_RESULT_OK) goto fail;
/* read target entry */
offset = ctx->hdr.head_size + ctx->hdr.entry_size * (ctx->target_subsong - 1);
if (ctx->cb.read) {
ctx->cb.seek(ctx->cb.arg, offset, SEEK_SET);
buf_size = ctx->cb.read(tmp, 1, ctx->hdr.entry_size, ctx->cb.arg);
buf = tmp;
}
else {
if (offset > ctx->cb.filebuf_size) goto fail;
buf = ctx->cb.filebuf + offset;
buf_size = ctx->cb.filebuf_size - offset;
}
err = bigrp_entry_parse(&ctx->etr, buf, buf_size);
if (err < ICESND_RESULT_OK) goto fail;
if (ctx->etr.codec == ICESND_CODEC_RANGE || ctx->etr.codec == ICESND_CODEC_DCT) {
ctx->intro_offset = offset + ctx->etr.intro_offset;
ctx->body_offset = offset + ctx->etr.body_offset;
}
//TODO fix library later
// see comment at top, but basically format is rather annoying to adapt as a streaming
// decoder, and ran out of time. For now it reads a whole blocks (intro/body) at once. Sorry!
/* prepare buf */
if (ctx->cb.read) {
int block_size = ctx->etr.body_zsize;
if (block_size < ctx->etr.intro_zsize)
block_size = ctx->etr.intro_zsize;
if (block_size % 0x10 != 0) /* pad just in case */
block_size = block_size + (0x10 - (block_size % 0x10));
ctx->blkbuf_size = block_size;
ctx->blkbuf = malloc(block_size);
if (!ctx->blkbuf) goto fail;
}
return ICESND_RESULT_OK;
fail:
return ICESND_ERROR_SETUP;
}
icesnd_handle_t* icesnd_init(int target_subsong, icesnd_callback_t* cb) {
icesnd_handle_t* ctx = NULL;
int err;
ctx = calloc(1, sizeof(icesnd_handle_t));
if (!ctx) goto fail;
ctx->target_subsong = target_subsong;
ctx->cb = *cb; /* memcpy */
if (!cb->filebuf && !(cb->read && cb->seek))
goto fail;
err = parse_header(ctx);
if (err < ICESND_RESULT_OK) goto fail;
ctx->is_range = ctx->etr.codec == 0x00;
if (ctx->is_range)
ctx->decoder = range_decoder_open();
else
ctx->decoder = dct_decoder_open();
if (!ctx->decoder) goto fail;
icesnd_reset(ctx, 0);
return ctx;
fail:
icesnd_free(ctx);
return NULL;
//return ICESND_ERROR_SETUP;
}
void icesnd_free(icesnd_handle_t* ctx) {
if (!ctx)
return;
if (ctx->decoder) {
if (ctx->is_range)
range_decoder_close(ctx->decoder);
else
dct_decoder_close(ctx->decoder);
}
free(ctx->blkbuf);
free(ctx);
}
int icesnd_info(icesnd_handle_t* ctx, icesnd_info_t* info) {
if (!ctx)
goto fail;
info->total_subsongs = ctx->hdr.total_subsongs;
info->codec = ctx->etr.codec;
info->sample_rate = ctx->etr.sample_rate;
info->channels = ctx->etr.channels;
info->loop_start = ctx->etr.intro_samples;
info->num_samples = ctx->etr.intro_samples + ctx->etr.body_samples;
info->loop_flag = ctx->etr.loop_flag;
return ICESND_RESULT_OK;
fail:
return ICESND_ERROR_DECODE;
}
void icesnd_reset(icesnd_handle_t* ctx, int loop_start) {
if (!ctx || !ctx->decoder)
return;
ctx->intro_init = 0;
ctx->body_init = 0;
ctx->intro_done = 0;
/* skip intro block in some cases */
if (ctx->etr.intro_samples == 0 || loop_start != 0)
ctx->intro_done = 1;
/* no need to reset decoder as will be done when block is set, plus
* only reset properly when doing that */
}
static int setup_block(icesnd_handle_t* ctx, int intro) {
int err;
const uint8_t* buf;
int buf_size;
int block_offset = (intro ? ctx->intro_offset : ctx->body_offset);
int block_size = (intro ? ctx->etr.intro_zsize : ctx->etr.body_zsize);
int block_samples = (intro ? ctx->etr.intro_samples : ctx->etr.body_samples);
if (ctx->cb.read) {
/* could optimize by ignoring calls (intro > body > body...) but this kinda simulates streamings */
if (block_size > ctx->blkbuf_size) /* can't happen but anyway */
return ICESND_ERROR_DECODE;
ctx->cb.seek(ctx->cb.arg, block_offset, SEEK_SET);
buf_size = ctx->cb.read(ctx->blkbuf, 1, block_size, ctx->cb.arg);
buf = ctx->blkbuf;
}
else {
if (ctx->cb.filebuf_size < block_offset + block_size)
return ICESND_ERROR_DECODE;
buf = ctx->cb.filebuf + block_offset;
buf_size = ctx->cb.filebuf_size - block_offset;
}
if (ctx->is_range) {
err = range_decoder_block_setup(ctx->decoder, buf, buf_size, &ctx->etr, block_samples);
}
else {
err = dct_decoder_block_setup(ctx->decoder, buf, buf_size, &ctx->etr); /* max_samples info is in codebook table */
}
if (err < ICESND_RESULT_OK) return err;
return ICESND_RESULT_OK;
}
int icesnd_decode(icesnd_handle_t* ctx, int16_t* sbuf, int max_samples) {
int err;
int samples_done, block_end;
int samples_decoded = 0;
if (!ctx)
goto fail;
while (max_samples > 0) {
if (!ctx->intro_done) {
if (!ctx->intro_init) {
err = setup_block(ctx, 1);
if (err < ICESND_RESULT_OK) return err;
ctx->intro_init = 1;
}
}
else {
if (!ctx->body_init) {
err = setup_block(ctx, 0);
ctx->body_init = 1;
}
}
if (ctx->is_range)
block_end = range_decoder_decode(ctx->decoder, sbuf, max_samples, &samples_done);
else
block_end = dct_decoder_decode(ctx->decoder, sbuf, max_samples, &samples_done);
max_samples -= samples_done;
samples_decoded += samples_done;
sbuf += samples_done * ctx->etr.channels;
//ctx->curr_sample += samples_done; /* original keeps this around to test if intro block is done */
if (block_end) {
/* after first block (could check if this is the first block but whatevs) */
ctx->intro_done = 1;
/* intro end, or after body end to allow loops on next calls */
if (ctx->etr.loop_flag)
ctx->body_init = 0;
}
/* could be possible on block end if not reset */
if (samples_done == 0)
break;
/* stop on on block boundary to ensure external caller may stop on loop end (could go on otherwise) */
if (block_end)
break;
}
return samples_decoded;
fail:
return ICESND_ERROR_DECODE;
}