Cog Audio: Change NEON s16/s32 to f32 code to use linear load/store instead of interleaved
parent
5ab728b205
commit
c5b940680d
|
@ -48,12 +48,12 @@ void convert_s16_to_float(float *out,
|
|||
while (samples >= 8)
|
||||
{
|
||||
float32x4x2_t oreg;
|
||||
int16x4x2_t inreg = vld2_s16(in);
|
||||
int16x4x2_t inreg = vld1_s16_x2(in); // why were these interleaved before?
|
||||
int32x4_t p1 = vmovl_s16(inreg.val[0]);
|
||||
int32x4_t p2 = vmovl_s16(inreg.val[1]);
|
||||
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(p1), vgf);
|
||||
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(p2), vgf);
|
||||
vst2q_f32(out, oreg);
|
||||
vst1q_f32_x2(out, oreg);
|
||||
in += 8;
|
||||
out += 8;
|
||||
samples -= 8;
|
||||
|
|
|
@ -48,10 +48,10 @@ void convert_s32_to_float(float *out,
|
|||
while (samples >= 8)
|
||||
{
|
||||
float32x4x2_t oreg;
|
||||
int32x4x2_t inreg = vld2q_s32(in);
|
||||
int32x4x2_t inreg = vld1q_s32_x2(in); // why were these interleaved before?
|
||||
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(inreg.val[0]), vgf);
|
||||
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(inreg.val[1]), vgf);
|
||||
vst2q_f32(out, oreg);
|
||||
vst1q_f32_x2(out, oreg);
|
||||
in += 8;
|
||||
out += 8;
|
||||
samples -= 8;
|
||||
|
|
Loading…
Reference in New Issue