Updated vio2sf

CQTexperiment
Chris Moeller 2013-10-13 21:38:58 -07:00
parent e6d83fc936
commit c3f90b4608
13 changed files with 498 additions and 59 deletions

View File

@ -42,6 +42,7 @@
#include "cp15.h"
//#include "wifi.h"
#include "registers.h"
#include "isqrt.h"
#if VIO2SF_GPU_ENABLE
#include "render3D.h"
@ -1359,7 +1360,7 @@ void FASTCALL MMU_write16(NDS_state *state, u32 proc, u32 adr, u16 val)
state->AUX_SPI_CMD = val & 0xFF;
}
T1WriteWord(state->MMU->MMU_MEM[proc][(REG_AUXSPIDATA >> 20) & 0xff], REG_AUXSPIDATA & 0xfff, bm_transfer(&state->MMU->bupmem, val));
T1WriteWord(state->MMU->MMU_MEM[proc][(REG_AUXSPIDATA >> 20) & 0xff], REG_AUXSPIDATA & 0xfff, bm_transfer(&state->MMU->bupmem, (u8)val));
return;
case REG_SPICNT :
@ -1410,7 +1411,7 @@ void FASTCALL MMU_write16(NDS_state *state, u32 proc, u32 adr, u16 val)
T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, 0);
break;
}
T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, fw_transfer(&state->MMU->fw, val));
T1WriteWord(state->MMU->MMU_MEM[proc][(REG_SPIDATA >> 20) & 0xff], REG_SPIDATA & 0xfff, fw_transfer(&state->MMU->fw, (u8)val));
return;
@ -2559,7 +2560,7 @@ void FASTCALL MMU_write32(NDS_state *state, u32 proc, u32 adr, u32 val)
case 1:
return;
}
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) sqrt((s64)v));
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) isqrt64(v));
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B0, cnt & 0x7FFF);
SQRTLOG("BOUT1 sqrt(%08X%08X) = %08X\r\n", (u32)(v>>32), (u32)v,
T1ReadLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4));
@ -2580,7 +2581,7 @@ void FASTCALL MMU_write32(NDS_state *state, u32 proc, u32 adr, u32 val)
v = T1ReadQuad(state->MMU->MMU_MEM[proc][0x40], 0x2B8);
break;
}
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) sqrt((s64)v));
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4, (u32) isqrt64(v));
T1WriteLong(state->MMU->MMU_MEM[proc][0x40], 0x2B0, cnt & 0x7FFF);
SQRTLOG("BOUT2 sqrt(%08X%08X) = %08X\r\n", (u32)(v>>32), (u32)v,
T1ReadLong(state->MMU->MMU_MEM[proc][0x40], 0x2B4));

View File

@ -268,7 +268,7 @@ void NDS_FreeROM(NDS_state *state)
void NDS_Reset( NDS_state *state)
{
BOOL oldexecute=state->execute;
int i;
unsigned int i;
u32 src;
u32 dst;
NDS_header * header = NDS_getROMHeader(state);

View File

@ -51,7 +51,7 @@
#include "state.h"
//===================CONFIGURATION========================
bool isChannelMuted(NDS_state *state, int num) { return state->dwChannelMute&(1<<num); }
bool isChannelMuted(NDS_state *state, int num) { return state->dwChannelMute&(1<<num) ? true : false; }
SPUInterpolationMode spuInterpolationMode(NDS_state *state) { return (SPUInterpolationMode)state->dwInterpolation; }
//=========================================================
@ -308,6 +308,12 @@ void SPU_struct::KeyOn(int channel)
{
channel_struct &thischan = channels[channel];
if (spuInterpolationMode(state) == SPUInterpolation_Lanczos)
{
thischan.init_lanczos();
lanczos_resampler_clear(thischan.lanczos_resampler);
}
adjust_channel_timer(&thischan);
// LOG("Channel %d key on: vol = %d, datashift = %d, hold = %d, pan = %d, waveduty = %d, repeat = %d, format = %d, source address = %07X, timer = %04X, loop start = %04X, length = %06X, cpu->state->MMUARM7_REG[0x501] = %02X\n", channel, chan->vol, chan->datashift, chan->hold, chan->pan, chan->waveduty, chan->repeat, chan->format, chan->addr, chan->timer, chan->loopstart, chan->length, T1ReadByte(MMU->ARM7_REG, 0x501));
@ -508,22 +514,11 @@ extern "C" void SPU_WriteLong(NDS_state *state, u32 addr, u32 val)
}
//////////////////////////////////////////////////////////////////////////////
static FORCEINLINE s32 Interpolate(SPUInterpolationMode INTERPOLATE_MODE, s32 a, s32 b, double ratio)
static FORCEINLINE s32 Interpolate(s32 a, s32 b, double ratio)
{
if(INTERPOLATE_MODE == SPUInterpolation_Cosine)
{
//why did we change it away from the lookup table? somebody should research that
ratio = ratio - sputrunc(ratio);
double ratio2 = ((1.0 - cos(ratio * M_PI)) * 0.5);
//double ratio2 = (1.0f - cos_lut[((int)(ratio*256.0))&0xFF]) / 2.0f;
return s32floor((float)(((1-ratio2)*a) + (ratio2*b)));
}
else
{
//linear interpolation
ratio = ratio - sputrunc(ratio);
return s32floor((float)((1-ratio)*a + ratio*b));
}
//linear interpolation
ratio = ratio - sputrunc(ratio);
return s32floor((float)((1-ratio)*a + ratio*b));
}
//////////////////////////////////////////////////////////////////////////////
@ -532,15 +527,15 @@ double round(double r)
return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5);
}
static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, channel_struct *chan, s32 *data)
static FORCEINLINE void Fetch8BitDataInternal(SPUInterpolationMode INTERPOLATE_MODE, channel_struct *chan, s32 *data)
{
u32 loc = sputrunc(chan->sampcnt);
if(INTERPOLATE_MODE != SPUInterpolation_None)
if(INTERPOLATE_MODE == SPUInterpolation_Linear)
{
s32 a = (s32)(chan->buf8[loc] << 8);
if(loc < (chan->totlength << 2) - 1) {
s32 b = (s32)(chan->buf8[loc + 1] << 8);
a = Interpolate(INTERPOLATE_MODE, a, b, chan->sampcnt);
a = Interpolate(a, b, chan->sampcnt);
}
*data = a;
}
@ -548,18 +543,18 @@ static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, cha
*data = (s32)chan->buf8[loc] << 8;
}
static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, const channel_struct * const chan, s32 *data)
static FORCEINLINE void Fetch16BitDataInternal(SPUInterpolationMode INTERPOLATE_MODE, const channel_struct * const chan, s32 *data)
{
const s16* const buf16 = chan->buf16;
const int shift = 1;
if(INTERPOLATE_MODE != SPUInterpolation_None)
if(INTERPOLATE_MODE == SPUInterpolation_Linear)
{
u32 loc = sputrunc(chan->sampcnt);
s32 a = (s32)buf16[loc], b;
if(loc < (chan->totlength << shift) - 1)
{
b = (s32)buf16[loc + 1];
a = Interpolate(INTERPOLATE_MODE,a, b, chan->sampcnt);
a = Interpolate(a, b, chan->sampcnt);
}
*data = a;
}
@ -567,7 +562,7 @@ static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, co
*data = (s32)buf16[sputrunc(chan->sampcnt)];
}
static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, channel_struct * const chan, s32 * const data)
static FORCEINLINE void FetchADPCMDataInternal(SPUInterpolationMode INTERPOLATE_MODE, channel_struct * const chan, s32 * const data)
{
// No sense decoding, just return the last sample
if (chan->lastsampcnt != sputrunc(chan->sampcnt)){
@ -594,8 +589,8 @@ static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, ch
chan->lastsampcnt = sputrunc(chan->sampcnt);
}
if(INTERPOLATE_MODE != SPUInterpolation_None)
*data = Interpolate(INTERPOLATE_MODE,(s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt);
if(INTERPOLATE_MODE == SPUInterpolation_Linear)
*data = Interpolate((s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt);
else
*data = (s32)chan->pcm16b;
}
@ -679,11 +674,18 @@ static FORCEINLINE void TestForLoop(NDS_state *state, int FORMAT, SPU_struct *SP
}
else
{
chan->status = CHANSTAT_STOPPED;
if (!chan->lanczos_resampler || !lanczos_resampler_get_sample_count(chan->lanczos_resampler))
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
}
else
{
chan->status = CHANSTAT_EMPTYBUFFER;
}
}
}
}
@ -715,14 +717,117 @@ static FORCEINLINE void TestForLoop2(NDS_state *state, SPU_struct *SPU, channel_
}
else
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
if (!chan->lanczos_resampler || !lanczos_resampler_get_sample_count(chan->lanczos_resampler))
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
}
else
{
chan->status = CHANSTAT_EMPTYBUFFER;
}
}
}
}
static FORCEINLINE void Fetch8BitData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data)
{
if (INTERPOLATE_MODE != SPUInterpolation_Lanczos)
return Fetch8BitDataInternal(INTERPOLATE_MODE, chan, data);
double saved_inc = chan->sampinc;
chan->sampinc = 1.0;
lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc );
while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler))
{
s32 sample;
Fetch8BitDataInternal(SPUInterpolation_None, chan, &sample);
TestForLoop(state, 0, SPU, chan);
lanczos_resampler_write_sample(chan->lanczos_resampler, sample);
}
chan->sampinc = saved_inc;
if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler))
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
}
*data = lanczos_resampler_get_sample(chan->lanczos_resampler);
lanczos_resampler_remove_sample(chan->lanczos_resampler);
}
static FORCEINLINE void Fetch16BitData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data)
{
if (INTERPOLATE_MODE != SPUInterpolation_Lanczos)
return Fetch16BitDataInternal(INTERPOLATE_MODE, chan, data);
double saved_inc = chan->sampinc;
chan->sampinc = 1.0;
lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc );
while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler))
{
s32 sample;
Fetch16BitDataInternal(SPUInterpolation_None, chan, &sample);
TestForLoop(state, 1, SPU, chan);
lanczos_resampler_write_sample(chan->lanczos_resampler, sample);
}
chan->sampinc = saved_inc;
if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler))
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
}
*data = lanczos_resampler_get_sample(chan->lanczos_resampler);
lanczos_resampler_remove_sample(chan->lanczos_resampler);
}
static FORCEINLINE void FetchADPCMData(SPUInterpolationMode INTERPOLATE_MODE, NDS_state *state, SPU_struct* const SPU, channel_struct *chan, s32 *data)
{
if (INTERPOLATE_MODE != SPUInterpolation_Lanczos)
return FetchADPCMDataInternal(INTERPOLATE_MODE, chan, data);
double saved_inc = chan->sampinc;
chan->sampinc = 1.0;
lanczos_resampler_set_rate( chan->lanczos_resampler, saved_inc );
while (chan->status != CHANSTAT_EMPTYBUFFER && lanczos_resampler_get_free_count(chan->lanczos_resampler))
{
s32 sample;
FetchADPCMDataInternal(SPUInterpolation_None, chan, &sample);
TestForLoop2(state, SPU, chan);
lanczos_resampler_write_sample(chan->lanczos_resampler, sample);
}
chan->sampinc = saved_inc;
if (!lanczos_resampler_get_sample_count(chan->lanczos_resampler))
{
chan->status = CHANSTAT_STOPPED;
if(SPU == state->SPU_core)
state->MMU->ARM7_REG[0x403 + (((chan-SPU->channels) ) * 0x10)] &= 0x7F;
SPU->bufpos = SPU->buflength;
}
*data = lanczos_resampler_get_sample(chan->lanczos_resampler);
lanczos_resampler_remove_sample(chan->lanczos_resampler);
}
FORCEINLINE static void SPU_Mix(int CHANNELS, SPU_struct* SPU, channel_struct *chan, s32 data)
{
switch(CHANNELS)
@ -742,18 +847,21 @@ FORCEINLINE static void ____SPU_ChanUpdate(NDS_state *state, int CHANNELS, int F
s32 data;
switch(FORMAT)
{
case 0: Fetch8BitData(INTERPOLATE_MODE, chan, &data); break;
case 1: Fetch16BitData(INTERPOLATE_MODE, chan, &data); break;
case 2: FetchADPCMData(INTERPOLATE_MODE, chan, &data); break;
case 0: Fetch8BitData(INTERPOLATE_MODE, state, SPU, chan, &data); break;
case 1: Fetch16BitData(INTERPOLATE_MODE, state, SPU, chan, &data); break;
case 2: FetchADPCMData(INTERPOLATE_MODE, state, SPU, chan, &data); break;
case 3: FetchPSGData(chan, &data); break;
}
SPU_Mix(CHANNELS, SPU, chan, data);
}
switch(FORMAT) {
case 0: case 1: TestForLoop(state, FORMAT, SPU, chan); break;
case 2: TestForLoop2(state, SPU, chan); break;
case 3: chan->sampcnt += chan->sampinc; break;
if (INTERPOLATE_MODE != SPUInterpolation_Lanczos)
{
switch(FORMAT) {
case 0: case 1: TestForLoop(state, FORMAT, SPU, chan); break;
case 2: TestForLoop2(state, SPU, chan); break;
case 3: chan->sampcnt += chan->sampinc; break;
}
}
}
}
@ -807,7 +915,7 @@ static void SPU_MixAudio(NDS_state *state, bool actuallyMix, SPU_struct *SPU, in
{
channel_struct *chan = &SPU->channels[i];
if (chan->status != CHANSTAT_PLAY)
if (chan->status == CHANSTAT_STOPPED)
continue;
SPU->bufpos = 0;

View File

@ -28,6 +28,8 @@
#include <math.h>
#include <assert.h>
#include "lanczos_resampler.h"
#ifdef _MSC_VER
#define FORCEINLINE __forceinline
#elif defined(__GNUC__) || defined(__clang__)
@ -77,12 +79,13 @@ static FORCEINLINE s32 spumuldiv7(s32 val, u8 multiplier) {
#define CHANSTAT_STOPPED 0
#define CHANSTAT_PLAY 1
#define CHANSTAT_EMPTYBUFFER 2
enum SPUInterpolationMode
{
SPUInterpolation_None = 0,
SPUInterpolation_Linear = 1,
SPUInterpolation_Cosine = 2
SPUInterpolation_Lanczos = 2
};
typedef struct NDS_state NDS_state;
@ -103,10 +106,31 @@ typedef struct SoundInterface_struct
extern SoundInterface_struct SNDDummy;
extern SoundInterface_struct SNDFile;
static bool lanczos_initialized = false;
struct channel_struct
{
channel_struct()
{}
{
lanczos_resampler = 0;
}
~channel_struct()
{
if (lanczos_resampler)
lanczos_resampler_delete(lanczos_resampler);
}
void init_lanczos()
{
if (!lanczos_resampler)
{
if (!lanczos_initialized)
{
lanczos_init();
lanczos_initialized = true;
}
lanczos_resampler = lanczos_resampler_create();
}
}
u32 num;
u8 vol;
u8 datashift;
@ -136,11 +160,11 @@ struct channel_struct
int loop_index;
u16 x;
s16 psgnoise_last;
void *lanczos_resampler;
} ;
class SPU_struct
struct SPU_struct
{
public:
SPU_struct(NDS_state *state, int buffersize);
u32 bufpos;
u32 buflength;

View File

@ -3722,7 +3722,7 @@ static u32 FASTCALL OP_SMLAL_B_B(armcpu_t *cpu)
LOG("SMLALBB %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + (res + ((tmp<0)*0xFFFFFFFF))), (int)(u32) res);
cpu->R[REG_POS(i,12)] = (u32) res;
cpu->R[REG_POS(i,16)] += (res + ((tmp<0)*0xFFFFFFFF));
cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF));
return 2;
}
@ -3736,7 +3736,7 @@ static u32 FASTCALL OP_SMLAL_B_T(armcpu_t *cpu)
LOG("SMLALBT %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res);
cpu->R[REG_POS(i,12)] = (u32) res;
cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF);
cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF));
return 2;
}
@ -3750,7 +3750,7 @@ static u32 FASTCALL OP_SMLAL_T_B(armcpu_t *cpu)
LOG("SMLALTB %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res);
cpu->R[REG_POS(i,12)] = (u32) res;
cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF);
cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF));
return 2;
}
@ -3764,7 +3764,7 @@ static u32 FASTCALL OP_SMLAL_T_T(armcpu_t *cpu)
LOG("SMLALTT %08X * %08X + %08X%08X = %08X%08X\r\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res);
cpu->R[REG_POS(i,12)] = (u32) res;
cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF);
cpu->R[REG_POS(i,16)] += (u32) (res + ((tmp<0)*0xFFFFFFFF));
return 2;
}

View File

@ -24,6 +24,7 @@
#include "MMU.h"
#include "spu_exports.h"
#include "debug.h"
#include "isqrt.h"
#include "state.h"
@ -307,7 +308,7 @@ u32 devide(armcpu_t* cpu)
cpu->R[0] = (u32)(num / dnum);
cpu->R[1] = (u32)(num % dnum);
cpu->R[3] = (u32) (((s32)cpu->R[0])<0 ? -cpu->R[0] : cpu->R[0]);
cpu->R[3] = (u32) (((s32)cpu->R[0])<0 ? -((s32)cpu->R[0]) : cpu->R[0]);
return 6;
}
@ -934,7 +935,7 @@ u32 Diff16bitUnFilter(armcpu_t* cpu)
u32 bios_sqrt(armcpu_t* cpu)
{
cpu->R[0] = (u32)sqrt((double)(cpu->R[0]));
cpu->R[0] = isqrt32(cpu->R[0]);
return 1;
}

View File

@ -0,0 +1,41 @@
#include "isqrt.h"
uint32_t isqrt32(uint32_t n) {
uint32_t s, t;
#define sqrtBit(k) \
t = s+(1U<<(k-1)); t <<= k+1; if (n >= t) { n -= t; s |= 1U<<k; }
s = 0U;
if (n >= 1U<<30) { n -= 1U<<30; s = 1U<<15; }
sqrtBit(14); sqrtBit(13); sqrtBit(12); sqrtBit(11); sqrtBit(10);
sqrtBit(9); sqrtBit(8); sqrtBit(7); sqrtBit(6); sqrtBit(5);
sqrtBit(4); sqrtBit(3); sqrtBit(2); sqrtBit(1);
if (n > s<<1) s |= 1U;
#undef sqrtBit
return s;
}
uint64_t isqrt64(uint64_t n) {
uint64_t s, t;
#define sqrtBit(k) \
t = s+(1ULL<<(k-1)); t <<= k+1; if (n >= t) { n -= t; s |= 1ULL<<k; }
s = 0ULL;
if (n >= 1ULL<<62) { n -= 1ULL<<62; s = 1ULL<<31; }
sqrtBit(30); sqrtBit(29); sqrtBit(28); sqrtBit(27); sqrtBit(26);
sqrtBit(25); sqrtBit(24); sqrtBit(23); sqrtBit(22); sqrtBit(21);
sqrtBit(20); sqrtBit(19); sqrtBit(18); sqrtBit(17); sqrtBit(16);
sqrtBit(15);
sqrtBit(14); sqrtBit(13); sqrtBit(12); sqrtBit(11); sqrtBit(10);
sqrtBit(9); sqrtBit(8); sqrtBit(7); sqrtBit(6); sqrtBit(5);
sqrtBit(4); sqrtBit(3); sqrtBit(2); sqrtBit(1);
if (n > s<<1) s |= 1ULL;
#undef sqrtBit
return s;
}

View File

@ -0,0 +1,9 @@
#ifndef vio2sf_isqrt_h
#define vio2sf_isqrt_h
#include <stdint.h>
uint32_t isqrt32(uint32_t n);
uint64_t isqrt64(uint64_t n);
#endif

View File

@ -0,0 +1,229 @@
#include <stdlib.h>
#include <string.h>
#define _USE_MATH_DEFINES
#include <math.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#include "lanczos_resampler.h"
enum { LANCZOS_RESOLUTION = 8192 };
enum { LANCZOS_WIDTH = 8 };
enum { LANCZOS_SAMPLES = LANCZOS_RESOLUTION * LANCZOS_WIDTH };
static double lanczos_lut[LANCZOS_SAMPLES + 1];
enum { lanczos_buffer_size = LANCZOS_WIDTH * 4 };
int fEqual(const double b, const double a)
{
return fabs(a - b) < 1.0e-6;
}
static double sinc(double x)
{
return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI);
}
void lanczos_init()
{
unsigned i;
double dx = (double)(LANCZOS_WIDTH) / LANCZOS_SAMPLES, x = 0.0;
for (i = 0; i < LANCZOS_SAMPLES + 1; ++i, x += dx)
lanczos_lut[i] = fabs(x) < LANCZOS_WIDTH ? sinc(x) * sinc(x / LANCZOS_WIDTH) : 0.0;
}
typedef struct lanczos_resampler
{
int write_pos, write_filled;
int read_pos, read_filled;
unsigned short phase;
unsigned int phase_inc;
float buffer_in[lanczos_buffer_size * 2];
int buffer_out[lanczos_buffer_size];
} lanczos_resampler;
void * lanczos_resampler_create()
{
lanczos_resampler * r = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) );
if ( !r ) return 0;
r->write_pos = 0;
r->write_filled = 0;
r->read_pos = 0;
r->read_filled = 0;
r->phase = 0;
r->phase_inc = 0;
memset( r->buffer_in, 0, sizeof(r->buffer_in) );
memset( r->buffer_out, 0, sizeof(r->buffer_out) );
return r;
}
void lanczos_resampler_delete(void * _r)
{
free( _r );
}
void * lanczos_resampler_dup(void * _r)
{
lanczos_resampler * r_in = ( lanczos_resampler * ) _r;
lanczos_resampler * r_out = ( lanczos_resampler * ) malloc( sizeof(lanczos_resampler) );
if ( !r_out ) return 0;
r_out->write_pos = r_in->write_pos;
r_out->write_filled = r_in->write_filled;
r_out->read_pos = r_in->read_pos;
r_out->read_filled = r_in->read_filled;
r_out->phase = r_in->phase;
r_out->phase_inc = r_in->phase_inc;
memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) );
memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) );
return r_out;
}
int lanczos_resampler_get_free_count(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
return lanczos_buffer_size - r->write_filled;
}
int lanczos_resampler_ready(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
return r->write_filled > (LANCZOS_WIDTH * 2);
}
void lanczos_resampler_clear(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
r->write_pos = 0;
r->write_filled = 0;
r->read_pos = 0;
r->read_filled = 0;
r->phase = 0;
}
void lanczos_resampler_set_rate(void *_r, double new_factor)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
r->phase_inc = (int)( new_factor * LANCZOS_RESOLUTION );
}
void lanczos_resampler_write_sample(void *_r, int s)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
if ( r->write_filled < lanczos_buffer_size )
{
float s32 = (float)s;
r->buffer_in[ r->write_pos ] = s32;
r->buffer_in[ r->write_pos + lanczos_buffer_size ] = s32;
++r->write_filled;
r->write_pos = ( r->write_pos + 1 ) % lanczos_buffer_size;
}
}
static int lanczos_resampler_run(lanczos_resampler * r, int ** out_, int * out_end)
{
int in_size = r->write_filled;
float const* in_ = r->buffer_in + lanczos_buffer_size + r->write_pos - r->write_filled;
int used = 0;
in_size -= LANCZOS_WIDTH * 2;
if ( in_size > 0 )
{
int* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
int phase = r->phase;
int phase_inc = r->phase_inc;
int step = phase_inc > LANCZOS_RESOLUTION ? LANCZOS_RESOLUTION * LANCZOS_RESOLUTION / phase_inc : LANCZOS_RESOLUTION;
do
{
// accumulate in extended precision
double kernel[LANCZOS_WIDTH * 2], kernel_sum = 0.0;
int i = LANCZOS_WIDTH;
int phase_adj = phase * step / LANCZOS_RESOLUTION;
double sample;
if ( out >= out_end )
break;
for (; i >= -LANCZOS_WIDTH + 1; --i)
{
int pos = i * step;
kernel_sum += kernel[i + LANCZOS_WIDTH - 1] = lanczos_lut[abs(phase_adj - pos)];
}
for (sample = 0, i = 0; i < LANCZOS_WIDTH * 2; ++i)
sample += in[i] * kernel[i];
*out++ = (int) (sample / kernel_sum);
phase += phase_inc;
in += phase >> 13;
phase &= 8191;
}
while ( in < in_end );
r->phase = phase;
*out_ = out;
used = in - in_;
r->write_filled -= used;
}
return used;
}
static void lanczos_resampler_fill(lanczos_resampler * r)
{
while ( r->write_filled > (LANCZOS_WIDTH * 2) &&
r->read_filled < lanczos_buffer_size )
{
int write_pos = ( r->read_pos + r->read_filled ) % lanczos_buffer_size;
int write_size = lanczos_buffer_size - write_pos;
int * out = r->buffer_out + write_pos;
if ( write_size > ( lanczos_buffer_size - r->read_filled ) )
write_size = lanczos_buffer_size - r->read_filled;
lanczos_resampler_run( r, &out, out + write_size );
r->read_filled += out - r->buffer_out - write_pos;
}
}
int lanczos_resampler_get_sample_count(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
if ( r->read_filled < 1 )
lanczos_resampler_fill( r );
return r->read_filled;
}
int lanczos_resampler_get_sample(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
if ( r->read_filled < 1 )
lanczos_resampler_fill( r );
if ( r->read_filled < 1 )
return 0;
return r->buffer_out[ r->read_pos ];
}
void lanczos_resampler_remove_sample(void *_r)
{
lanczos_resampler * r = ( lanczos_resampler * ) _r;
if ( r->read_filled > 0 )
{
--r->read_filled;
r->read_pos = ( r->read_pos + 1 ) % lanczos_buffer_size;
}
}

View File

@ -0,0 +1,26 @@
#ifndef _LANCZOS_RESAMPLER_H_
#define _LANCZOS_RESAMPLER_H_
#ifdef __cplusplus
extern "C" {
#endif
void lanczos_init();
void * lanczos_resampler_create();
void lanczos_resampler_delete(void *);
int lanczos_resampler_get_free_count(void *);
void lanczos_resampler_write_sample(void *, int sample);
void lanczos_resampler_set_rate( void *, double new_factor );
int lanczos_resampler_ready(void *);
void lanczos_resampler_clear(void *);
int lanczos_resampler_get_sample_count(void *);
int lanczos_resampler_get_sample(void *);
void lanczos_resampler_remove_sample(void *);
#ifdef __cplusplus
};
#endif
#endif

View File

@ -385,7 +385,7 @@ void state_loadstate(struct NDS_state *state, const u8 *ss, u32 ss_size)
state->execute = TRUE;
}
void state_render(struct NDS_state *state, s16 * buffer, int sample_count)
void state_render(struct NDS_state *state, s16 * buffer, unsigned int sample_count)
{
s16 * ptr = buffer;

View File

@ -87,7 +87,7 @@ void state_setrom(NDS_state *state, u8 * rom, u32 rom_size);
void state_loadstate(NDS_state *state, const u8 * ss, u32 ss_size);
void state_render(NDS_state *state, s16 * buffer, int sample_count);
void state_render(NDS_state *state, s16 * buffer, unsigned int sample_count);
#ifdef __cplusplus
};

View File

@ -1011,7 +1011,7 @@ static int twosf_info(void * context, const char * name, const char * value)
return NO;
}
core->dwInterpolation = 1;
core->dwInterpolation = 2;
core->dwChannelMute = 0;
if (!state.arm7_clockdown_level)