From c3f90b4608789fd8a9ce7ec9d16a86851d232bac Mon Sep 17 00:00:00 2001 From: Chris Moeller Date: Sun, 13 Oct 2013 21:38:58 -0700 Subject: [PATCH] Updated vio2sf --- .../vio2sf/vio2sf/src/vio2sf/desmume/MMU.c | 9 +- .../vio2sf/src/vio2sf/desmume/NDSSystem.c | 2 +- .../vio2sf/vio2sf/src/vio2sf/desmume/SPU.cpp | 190 +++++++++++---- .../vio2sf/vio2sf/src/vio2sf/desmume/SPU.h | 32 ++- .../src/vio2sf/desmume/arm_instructions.c | 8 +- .../vio2sf/vio2sf/src/vio2sf/desmume/bios.c | 5 +- .../vio2sf/vio2sf/src/vio2sf/desmume/isqrt.c | 41 ++++ .../vio2sf/vio2sf/src/vio2sf/desmume/isqrt.h | 9 + .../src/vio2sf/desmume/lanczos_resampler.c | 229 ++++++++++++++++++ .../src/vio2sf/desmume/lanczos_resampler.h | 26 ++ .../vio2sf/vio2sf/src/vio2sf/desmume/state.c | 2 +- .../vio2sf/vio2sf/src/vio2sf/desmume/state.h | 2 +- .../HighlyComplete/HCDecoder.mm | 2 +- 13 files changed, 498 insertions(+), 59 deletions(-) create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.c create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.h create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.c create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.h diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c index 6f6d80f4a..25ad43c05 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c @@ -42,6 +42,7 @@ #include "cp15.h" //#include "wifi.h" #include "registers.h" +#include "isqrt.h" #if VIO2SF_GPU_ENABLE #include "render3D.h" @@ -1359,7 +1360,7 @@ void FASTCALL MMU_write16(NDS_state *state, u32 proc, u32 adr, u16 val) state->AUX_SPI_CMD = val & 0xFF; } - T1WriteWord(state->MMU->MMU_MEM[proc][(REG_AUXSPIDATA >> 20) & 0xff], REG_AUXSPIDATA & 0xfff, bm_transfer(&state->MMU->bupmem, val)); + T1WriteWord(state->MMU->MMU_MEM[proc][(REG_AUXSPIDATA >> 20) & 0xff], REG_AUXSPIDATA & 0xfff, bm_transfer(&state->MMU->bupmem, (u8)val)); return; case REG_SPICNT : @@ -1410,7 +1411,7 @@ void FASTCALL MMU_write16(NDS_state *state, u32 proc, u32 adr, u16 val) T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, 0); break; } - T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, fw_transfer(&state->MMU->fw, val)); + T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, fw_transfer(&state->MMU->fw, (u8)val)); return; @@ -2559,7 +2560,7 @@ void FASTCALL MMU_write32(NDS_state *state, u32 proc, u32 adr, u32 val) case 1: return; } - T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) sqrt((s64)v)); + T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) isqrt64(v)); T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B0, cnt & 0x7FFF); SQRTLOG("BOUT1 sqrt(%08X%08X) = %08X\r\n", (u32)(v>>32), (u32)v, T1ReadLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4)); @@ -2580,7 +2581,7 @@ void FASTCALL MMU_write32(NDS_state *state, u32 proc, u32 adr, u32 val) v = T1ReadQuad(state->MMU->MMU_MEM[proc][0x40], 0x2B8); break; } - T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) sqrt((s64)v)); + T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) isqrt64(v)); T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B0, cnt & 0x7FFF); SQRTLOG("BOUT2 sqrt(%08X%08X) = %08X\r\n", (u32)(v>>32), (u32)v, T1ReadLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4)); diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c index 01e42734a..8872a8947 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c @@ -268,7 +268,7 @@ void NDS_FreeROM(NDS_state *state) void NDS_Reset( NDS_state *state) { BOOL oldexecute=state->execute; - int i; + unsigned int i; u32 src; u32 dst; NDS_header * header = NDS_getROMHeader(state); diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.cpp b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.cpp index a120225f2..ae9b2ae9c 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.cpp +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.cpp @@ -51,7 +51,7 @@ #include "state.h" //===================CONFIGURATION======================== -bool isChannelMuted(NDS_state *state, int num) { return state->dwChannelMute&(1<dwChannelMute&(1<dwInterpolation; } //========================================================= @@ -308,6 +308,12 @@ void SPU_struct::KeyOn(int channel) { channel_struct &thischan = channels[channel]; + if (spuInterpolationMode(state) == SPUInterpolation_Lanczos) + { + thischan.init_lanczos(); + lanczos_resampler_clear(thischan.lanczos_resampler); + } + adjust_channel_timer(&thischan); // LOG("Channel %d key on: vol = %d, datashift = %d, hold = %d, pan = %d, waveduty = %d, repeat = %d, format = %d, source address = %07X, timer = %04X, loop start = %04X, length = %06X, cpu->state->MMUARM7_REG[0x501] = %02X\n", channel, chan->vol, chan->datashift, chan->hold, chan->pan, chan->waveduty, chan->repeat, chan->format, chan->addr, chan->timer, chan->loopstart, chan->length, T1ReadByte(MMU->ARM7_REG, 0x501)); @@ -508,22 +514,11 @@ extern "C" void SPU_WriteLong(NDS_state *state, u32 addr, u32 val) } ////////////////////////////////////////////////////////////////////////////// -static FORCEINLINE s32 Interpolate(SPUInterpolationMode INTERPOLATE_MODE, s32 a, s32 b, double ratio) +static FORCEINLINE s32 Interpolate(s32 a, s32 b, double ratio) { - if(INTERPOLATE_MODE == SPUInterpolation_Cosine) - { - //why did we change it away from the lookup table? somebody should research that - ratio = ratio - sputrunc(ratio); - double ratio2 = ((1.0 - cos(ratio * M_PI)) * 0.5); - //double ratio2 = (1.0f - cos_lut[((int)(ratio*256.0))&0xFF]) / 2.0f; - return s32floor((float)(((1-ratio2)*a) + (ratio2*b))); - } - else - { - //linear interpolation - ratio = ratio - sputrunc(ratio); - return s32floor((float)((1-ratio)*a + ratio*b)); - } + //linear interpolation + ratio = ratio - sputrunc(ratio); + return s32floor((float)((1-ratio)*a + ratio*b)); } ////////////////////////////////////////////////////////////////////////////// @@ -532,15 +527,15 @@ double round(double r) return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5); } -static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, channel_struct *chan, s32 *data) +static FORCEINLINE void Fetch8BitDataInternal(SPUInterpolationMode INTERPOLATE_MODE, channel_struct *chan, s32 *data) { u32 loc = sputrunc(chan->sampcnt); - if(INTERPOLATE_MODE != SPUInterpolation_None) + if(INTERPOLATE_MODE == SPUInterpolation_Linear) { s32 a = (s32)(chan->buf8[loc] << 8); if(loc < (chan->totlength << 2) - 1) { s32 b = (s32)(chan->buf8[loc + 1] << 8); - a = Interpolate(INTERPOLATE_MODE, a, b, chan->sampcnt); + a = Interpolate(a, b, chan->sampcnt); } *data = a; } @@ -548,18 +543,18 @@ static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, cha *data = (s32)chan->buf8[loc] << 8; } -static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, const channel_struct * const chan, s32 *data) +static FORCEINLINE void Fetch16BitDataInternal(SPUInterpolationMode INTERPOLATE_MODE, const channel_struct * const chan, s32 *data) { const s16* const buf16 = chan->buf16; const int shift = 1; - if(INTERPOLATE_MODE != SPUInterpolation_None) + if(INTERPOLATE_MODE == SPUInterpolation_Linear) { u32 loc = sputrunc(chan->sampcnt); s32 a = (s32)buf16[loc], b; if(loc < (chan->totlength << shift) - 1) { b = (s32)buf16[loc + 1]; - a = Interpolate(INTERPOLATE_MODE,a, b, chan->sampcnt); + a = Interpolate(a, b, chan->sampcnt); } *data = a; } @@ -567,7 +562,7 @@ static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, co *data = (s32)buf16[sputrunc(chan->sampcnt)]; } -static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, channel_struct * const chan, s32 * const data) +static FORCEINLINE void FetchADPCMDataInternal(SPUInterpolationMode INTERPOLATE_MODE, channel_struct * const chan, s32 * const data) { // No sense decoding, just return the last sample if (chan->lastsampcnt != sputrunc(chan->sampcnt)){ @@ -594,8 +589,8 @@ static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, ch chan->lastsampcnt = sputrunc(chan->sampcnt); } - if(INTERPOLATE_MODE != SPUInterpolation_None) - *data = Interpolate(INTERPOLATE_MODE,(s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt); + if(INTERPOLATE_MODE == SPUInterpolation_Linear) + *data = Interpolate((s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt); else *data = (s32)chan->pcm16b; } @@ -679,11 +674,18 @@ static FORCEINLINE void TestForLoop(NDS_state *state, int FORMAT, SPU_struct *SP } else { - chan->status = CHANSTAT_STOPPED; + if (!chan->lanczos_resampler || !lanczos_resampler_get_sample_count(chan->lanczos_resampler)) + { + chan->status = CHANSTAT_STOPPED; - if(SPU == state->SPU_core) - state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; - SPU->bufpos = SPU->buflength; + if(SPU == state->SPU_core) + state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; + SPU->bufpos = SPU->buflength; + } + else + { + chan->status = CHANSTAT_EMPTYBUFFER; + } } } } @@ -715,14 +717,117 @@ static FORCEINLINE void TestForLoop2(NDS_state *state, SPU_struct *SPU, channel_ } else { - chan->status = CHANSTAT_STOPPED; - if(SPU == state->SPU_core) - state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; - SPU->bufpos = SPU->buflength; + if (!chan->lanczos_resampler || !lanczos_resampler_get_sample_count(chan->lanczos_resampler)) + { + chan->status = CHANSTAT_STOPPED; + if(SPU == state->SPU_core) + state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; + SPU->bufpos = SPU->buflength; + } + else + { + chan->status = CHANSTAT_EMPTYBUFFER; + } } } } +static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data) +{ + if (INTERPOLATE_MODE != SPUInterpolation_Lanczos) + return Fetch8BitDataInternal(INTERPOLATE_MODE, chan, data); + + double saved_inc = chan->sampinc; + chan->sampinc = 1.0; + + lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc ); + + while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler)) + { + s32 sample; + Fetch8BitDataInternal(SPUInterpolation_None, chan, &sample); + TestForLoop(state, 0, SPU, chan); + lanczos_resampler_write_sample(chan->lanczos_resampler, sample); + } + + chan->sampinc = saved_inc; + + if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler)) + { + chan->status = CHANSTAT_STOPPED; + if(SPU == state->SPU_core) + state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; + SPU->bufpos = SPU->buflength; + } + + *data = lanczos_resampler_get_sample(chan->lanczos_resampler); + lanczos_resampler_remove_sample(chan->lanczos_resampler); +} + +static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data) +{ + if (INTERPOLATE_MODE != SPUInterpolation_Lanczos) + return Fetch16BitDataInternal(INTERPOLATE_MODE, chan, data); + + double saved_inc = chan->sampinc; + chan->sampinc = 1.0; + + lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc ); + + while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler)) + { + s32 sample; + Fetch16BitDataInternal(SPUInterpolation_None, chan, &sample); + TestForLoop(state, 1, SPU, chan); + lanczos_resampler_write_sample(chan->lanczos_resampler, sample); + } + + chan->sampinc = saved_inc; + + if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler)) + { + chan->status = CHANSTAT_STOPPED; + if(SPU == state->SPU_core) + state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; + SPU->bufpos = SPU->buflength; + } + + *data = lanczos_resampler_get_sample(chan->lanczos_resampler); + lanczos_resampler_remove_sample(chan->lanczos_resampler); +} + +static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data) +{ + if (INTERPOLATE_MODE != SPUInterpolation_Lanczos) + return FetchADPCMDataInternal(INTERPOLATE_MODE, chan, data); + + double saved_inc = chan->sampinc; + chan->sampinc = 1.0; + + lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc ); + + while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler)) + { + s32 sample; + FetchADPCMDataInternal(SPUInterpolation_None, chan, &sample); + TestForLoop2(state, SPU, chan); + lanczos_resampler_write_sample(chan->lanczos_resampler, sample); + } + + chan->sampinc = saved_inc; + + if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler)) + { + chan->status = CHANSTAT_STOPPED; + if(SPU == state->SPU_core) + state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F; + SPU->bufpos = SPU->buflength; + } + + *data = lanczos_resampler_get_sample(chan->lanczos_resampler); + lanczos_resampler_remove_sample(chan->lanczos_resampler); +} + FORCEINLINE static void SPU_Mix(int CHANNELS, SPU_struct* SPU, channel_struct *chan, s32 data) { switch(CHANNELS) @@ -742,18 +847,21 @@ FORCEINLINE static void ____SPU_ChanUpdate(NDS_state *state, int CHANNELS, int F s32 data; switch(FORMAT) { - case 0: Fetch8BitData(INTERPOLATE_MODE, chan, &data); break; - case 1: Fetch16BitData(INTERPOLATE_MODE, chan, &data); break; - case 2: FetchADPCMData(INTERPOLATE_MODE, chan, &data); break; + case 0: Fetch8BitData(INTERPOLATE_MODE, state, SPU, chan, &data); break; + case 1: Fetch16BitData(INTERPOLATE_MODE, state, SPU, chan, &data); break; + case 2: FetchADPCMData(INTERPOLATE_MODE, state, SPU, chan, &data); break; case 3: FetchPSGData(chan, &data); break; } SPU_Mix(CHANNELS, SPU, chan, data); } - switch(FORMAT) { - case 0: case 1: TestForLoop(state, FORMAT, SPU, chan); break; - case 2: TestForLoop2(state, SPU, chan); break; - case 3: chan->sampcnt += chan->sampinc; break; + if (INTERPOLATE_MODE != SPUInterpolation_Lanczos) + { + switch(FORMAT) { + case 0: case 1: TestForLoop(state, FORMAT, SPU, chan); break; + case 2: TestForLoop2(state, SPU, chan); break; + case 3: chan->sampcnt += chan->sampinc; break; + } } } } @@ -807,7 +915,7 @@ static void SPU_MixAudio(NDS_state *state, bool actuallyMix, SPU_struct *SPU, in { channel_struct *chan = &SPU->channels[i]; - if (chan->status != CHANSTAT_PLAY) + if (chan->status == CHANSTAT_STOPPED) continue; SPU->bufpos = 0; diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.h index b438dbf22..6ebfdce48 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.h +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/SPU.h @@ -28,6 +28,8 @@ #include #include +#include "lanczos_resampler.h" + #ifdef _MSC_VER #define FORCEINLINE __forceinline #elif defined(__GNUC__) || defined(__clang__) @@ -77,12 +79,13 @@ static FORCEINLINE s32 spumuldiv7(s32 val, u8 multiplier) { #define CHANSTAT_STOPPED 0 #define CHANSTAT_PLAY 1 +#define CHANSTAT_EMPTYBUFFER 2 enum SPUInterpolationMode { SPUInterpolation_None = 0, SPUInterpolation_Linear = 1, - SPUInterpolation_Cosine = 2 + SPUInterpolation_Lanczos = 2 }; typedef struct NDS_state NDS_state; @@ -103,10 +106,31 @@ typedef struct SoundInterface_struct extern SoundInterface_struct SNDDummy; extern SoundInterface_struct SNDFile; +static bool lanczos_initialized = false; + struct channel_struct { channel_struct() - {} + { + lanczos_resampler = 0; + } + ~channel_struct() + { + if (lanczos_resampler) + lanczos_resampler_delete(lanczos_resampler); + } + void init_lanczos() + { + if (!lanczos_resampler) + { + if (!lanczos_initialized) + { + lanczos_init(); + lanczos_initialized = true; + } + lanczos_resampler = lanczos_resampler_create(); + } + } u32 num; u8 vol; u8 datashift; @@ -136,11 +160,11 @@ struct channel_struct int loop_index; u16 x; s16 psgnoise_last; + void *lanczos_resampler; } ; -class SPU_struct +struct SPU_struct { -public: SPU_struct(NDS_state *state, int buffersize); u32 bufpos; u32 buflength; diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/arm_instructions.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/arm_instructions.c index 126ff338c..b39501a2b 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/arm_instructions.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/arm_instructions.c @@ -3722,7 +3722,7 @@ static u32 FASTCALL OP_SMLAL_B_B(armcpu_t *cpu) LOG("SMLALBB %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + (res + ((tmp<0)*0xFFFFFFFF))), (int)(u32) res); cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += (res + ((tmp<0)*0xFFFFFFFF)); + cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF)); return 2; } @@ -3736,7 +3736,7 @@ static u32 FASTCALL OP_SMLAL_B_T(armcpu_t *cpu) LOG("SMLALBT %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF)); return 2; } @@ -3750,7 +3750,7 @@ static u32 FASTCALL OP_SMLAL_T_B(armcpu_t *cpu) LOG("SMLALTB %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF)); return 2; } @@ -3764,7 +3764,7 @@ static u32 FASTCALL OP_SMLAL_T_T(armcpu_t *cpu) LOG("SMLALTT %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF)); return 2; } diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/bios.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/bios.c index 80b506c29..ed73d52d7 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/bios.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/bios.c @@ -24,6 +24,7 @@ #include "MMU.h" #include "spu_exports.h" #include "debug.h" +#include "isqrt.h" #include "state.h" @@ -307,7 +308,7 @@ u32 devide(armcpu_t* cpu) cpu->R[0] = (u32)(num / dnum); cpu->R[1] = (u32)(num % dnum); - cpu->R[3] = (u32) (((s32)cpu->R[0])<0 ? -cpu->R[0] : cpu->R[0]); + cpu->R[3] = (u32) (((s32)cpu->R[0])<0 ? -((s32)cpu->R[0]) : cpu->R[0]); return 6; } @@ -934,7 +935,7 @@ u32 Diff16bitUnFilter(armcpu_t* cpu) u32 bios_sqrt(armcpu_t* cpu) { - cpu->R[0] = (u32)sqrt((double)(cpu->R[0])); + cpu->R[0] = isqrt32(cpu->R[0]); return 1; } diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.c new file mode 100644 index 000000000..a901a7c53 --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.c @@ -0,0 +1,41 @@ +#include "isqrt.h" + +uint32_t isqrt32(uint32_t n) { + uint32_t s, t; + +#define sqrtBit(k) \ + t = s+(1U<<(k-1)); t <<= k+1; if (n >= t) { n -= t; s |= 1U<= 1U<<30) { n -= 1U<<30; s = 1U<<15; } + sqrtBit(14); sqrtBit(13); sqrtBit(12); sqrtBit(11); sqrtBit(10); + sqrtBit(9); sqrtBit(8); sqrtBit(7); sqrtBit(6); sqrtBit(5); + sqrtBit(4); sqrtBit(3); sqrtBit(2); sqrtBit(1); + if (n > s<<1) s |= 1U; + +#undef sqrtBit + + return s; +} + +uint64_t isqrt64(uint64_t n) { + uint64_t s, t; + +#define sqrtBit(k) \ + t = s+(1ULL<<(k-1)); t <<= k+1; if (n >= t) { n -= t; s |= 1ULL<= 1ULL<<62) { n -= 1ULL<<62; s = 1ULL<<31; } + sqrtBit(30); sqrtBit(29); sqrtBit(28); sqrtBit(27); sqrtBit(26); + sqrtBit(25); sqrtBit(24); sqrtBit(23); sqrtBit(22); sqrtBit(21); + sqrtBit(20); sqrtBit(19); sqrtBit(18); sqrtBit(17); sqrtBit(16); + sqrtBit(15); + sqrtBit(14); sqrtBit(13); sqrtBit(12); sqrtBit(11); sqrtBit(10); + sqrtBit(9); sqrtBit(8); sqrtBit(7); sqrtBit(6); sqrtBit(5); + sqrtBit(4); sqrtBit(3); sqrtBit(2); sqrtBit(1); + if (n > s<<1) s |= 1ULL; + +#undef sqrtBit + + return s; +} diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.h new file mode 100644 index 000000000..5af9c80af --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/isqrt.h @@ -0,0 +1,9 @@ +#ifndef vio2sf_isqrt_h +#define vio2sf_isqrt_h + +#include + +uint32_t isqrt32(uint32_t n); +uint64_t isqrt64(uint64_t n); + +#endif diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.c new file mode 100644 index 000000000..6ed97f34b --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.c @@ -0,0 +1,229 @@ +#include +#include +#define _USE_MATH_DEFINES +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "lanczos_resampler.h" + +enum { LANCZOS_RESOLUTION = 8192 }; +enum { LANCZOS_WIDTH = 8 }; +enum { LANCZOS_SAMPLES = LANCZOS_RESOLUTION * LANCZOS_WIDTH }; + +static double lanczos_lut[LANCZOS_SAMPLES + 1]; + +enum { lanczos_buffer_size = LANCZOS_WIDTH * 4 }; + +int fEqual(const double b, const double a) +{ + return fabs(a - b) < 1.0e-6; +} + +static double sinc(double x) +{ + return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI); +} + +void lanczos_init() +{ + unsigned i; + double dx = (double)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0; + for (i = 0; i < LANCZOS_SAMPLES + 1; ++i, x += dx) + lanczos_lut[i] = fabs(x) < LANCZOS_WIDTH ? sinc(x) * sinc(x / LANCZOS_WIDTH) : 0.0; +} + +typedef struct lanczos_resampler +{ + int write_pos, write_filled; + int read_pos, read_filled; + unsigned short phase; + unsigned int phase_inc; + float buffer_in[lanczos_buffer_size * 2]; + int buffer_out[lanczos_buffer_size]; +} lanczos_resampler; + +void * lanczos_resampler_create() +{ + lanczos_resampler * r = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) ); + if ( !r ) return 0; + + r->write_pos = 0; + r->write_filled = 0; + r->read_pos = 0; + r->read_filled = 0; + r->phase = 0; + r->phase_inc = 0; + memset( r->buffer_in, 0, sizeof(r->buffer_in) ); + memset( r->buffer_out, 0, sizeof(r->buffer_out) ); + + return r; +} + +void lanczos_resampler_delete(void * _r) +{ + free( _r ); +} + +void * lanczos_resampler_dup(void * _r) +{ + lanczos_resampler * r_in = ( lanczos_resampler * ) _r; + lanczos_resampler * r_out = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) ); + if ( !r_out ) return 0; + + r_out->write_pos = r_in->write_pos; + r_out->write_filled = r_in->write_filled; + r_out->read_pos = r_in->read_pos; + r_out->read_filled = r_in->read_filled; + r_out->phase = r_in->phase; + r_out->phase_inc = r_in->phase_inc; + memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) ); + memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) ); + + return r_out; +} + +int lanczos_resampler_get_free_count(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + return lanczos_buffer_size - r->write_filled; +} + +int lanczos_resampler_ready(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + return r->write_filled > (LANCZOS_WIDTH * 2); +} + +void lanczos_resampler_clear(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + r->write_pos = 0; + r->write_filled = 0; + r->read_pos = 0; + r->read_filled = 0; + r->phase = 0; +} + +void lanczos_resampler_set_rate(void *_r, double new_factor) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + r->phase_inc = (int)( new_factor * LANCZOS_RESOLUTION ); +} + +void lanczos_resampler_write_sample(void *_r, int s) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + + if ( r->write_filled < lanczos_buffer_size ) + { + float s32 = (float)s; + + r->buffer_in[ r->write_pos ] = s32; + r->buffer_in[ r->write_pos + lanczos_buffer_size ] = s32; + + ++r->write_filled; + + r->write_pos = ( r->write_pos + 1 ) % lanczos_buffer_size; + } +} + +static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + lanczos_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= LANCZOS_WIDTH * 2; + if ( in_size > 0 ) + { + int* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + int phase = r->phase; + int phase_inc = r->phase_inc; + + int step = phase_inc > LANCZOS_RESOLUTION ? LANCZOS_RESOLUTION * LANCZOS_RESOLUTION / phase_inc : LANCZOS_RESOLUTION; + + do + { + // accumulate in extended precision + double kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0; + int i = LANCZOS_WIDTH; + int phase_adj = phase * step / LANCZOS_RESOLUTION; + double sample; + + if ( out >= out_end ) + break; + + for (; i >= -LANCZOS_WIDTH + 1; --i) + { + int pos = i * step; + kernel_sum += kernel[i + LANCZOS_WIDTH - 1] = lanczos_lut[abs(phase_adj - pos)]; + } + for (sample = 0, i = 0; i < LANCZOS_WIDTH * 2; ++i) + sample += in[i] * kernel[i]; + *out++ = (int) (sample / kernel_sum); + + phase += phase_inc; + + in += phase >> 13; + + phase &= 8191; + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = in - in_; + + r->write_filled -= used; + } + + return used; +} + +static void lanczos_resampler_fill(lanczos_resampler * r) +{ + while ( r->write_filled > (LANCZOS_WIDTH * 2) && + r->read_filled < lanczos_buffer_size ) + { + int write_pos = ( r->read_pos + r->read_filled ) % lanczos_buffer_size; + int write_size = lanczos_buffer_size - write_pos; + int * out = r->buffer_out + write_pos; + if ( write_size > ( lanczos_buffer_size - r->read_filled ) ) + write_size = lanczos_buffer_size - r->read_filled; + lanczos_resampler_run( r, &out, out + write_size ); + r->read_filled += out - r->buffer_out - write_pos; + } +} + +int lanczos_resampler_get_sample_count(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + if ( r->read_filled < 1 ) + lanczos_resampler_fill( r ); + return r->read_filled; +} + +int lanczos_resampler_get_sample(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + if ( r->read_filled < 1 ) + lanczos_resampler_fill( r ); + if ( r->read_filled < 1 ) + return 0; + return r->buffer_out[ r->read_pos ]; +} + +void lanczos_resampler_remove_sample(void *_r) +{ + lanczos_resampler * r = ( lanczos_resampler * ) _r; + if ( r->read_filled > 0 ) + { + --r->read_filled; + r->read_pos = ( r->read_pos + 1 ) % lanczos_buffer_size; + } +} diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.h new file mode 100644 index 000000000..347cee01e --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/lanczos_resampler.h @@ -0,0 +1,26 @@ +#ifndef _LANCZOS_RESAMPLER_H_ +#define _LANCZOS_RESAMPLER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +void lanczos_init(); + +void * lanczos_resampler_create(); +void lanczos_resampler_delete(void *); + +int lanczos_resampler_get_free_count(void *); +void lanczos_resampler_write_sample(void *, int sample); +void lanczos_resampler_set_rate( void *, double new_factor ); +int lanczos_resampler_ready(void *); +void lanczos_resampler_clear(void *); +int lanczos_resampler_get_sample_count(void *); +int lanczos_resampler_get_sample(void *); +void lanczos_resampler_remove_sample(void *); + +#ifdef __cplusplus +}; +#endif + +#endif diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c index 077fd301a..5db94bfb8 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c @@ -385,7 +385,7 @@ void state_loadstate(struct NDS_state *state, const u8 *ss, u32 ss_size) state->execute = TRUE; } -void state_render(struct NDS_state *state, s16 * buffer, int sample_count) +void state_render(struct NDS_state *state, s16 * buffer, unsigned int sample_count) { s16 * ptr = buffer; diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h index cb0079731..c1f4c2bbd 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h @@ -87,7 +87,7 @@ void state_setrom(NDS_state *state, u8 * rom, u32 rom_size); void state_loadstate(NDS_state *state, const u8 * ss, u32 ss_size); -void state_render(NDS_state *state, s16 * buffer, int sample_count); +void state_render(NDS_state *state, s16 * buffer, unsigned int sample_count); #ifdef __cplusplus }; diff --git a/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm b/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm index f771b47bc..817454ee3 100644 --- a/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm +++ b/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm @@ -1011,7 +1011,7 @@ static int twosf_info(void * context, const char * name, const char * value) return NO; } - core->dwInterpolation = 1; + core->dwInterpolation = 2; core->dwChannelMute = 0; if (!state.arm7_clockdown_level)