Cog Audio: Change NEON s16/s32 to f32 code to use linear load/store instead of interleaved

CQTexperiment
Christopher Snowhill 2022-01-15 02:08:04 -08:00
parent 5ab728b205
commit c5b940680d
2 changed files with 4 additions and 4 deletions

View File

@ -48,12 +48,12 @@ void convert_s16_to_float(float *out,
while (samples >= 8) while (samples >= 8)
{ {
float32x4x2_t oreg; float32x4x2_t oreg;
int16x4x2_t inreg = vld2_s16(in); int16x4x2_t inreg = vld1_s16_x2(in); // why were these interleaved before?
int32x4_t p1 = vmovl_s16(inreg.val[0]); int32x4_t p1 = vmovl_s16(inreg.val[0]);
int32x4_t p2 = vmovl_s16(inreg.val[1]); int32x4_t p2 = vmovl_s16(inreg.val[1]);
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(p1), vgf); oreg.val[0] = vmulq_f32(vcvtq_f32_s32(p1), vgf);
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(p2), vgf); oreg.val[1] = vmulq_f32(vcvtq_f32_s32(p2), vgf);
vst2q_f32(out, oreg); vst1q_f32_x2(out, oreg);
in += 8; in += 8;
out += 8; out += 8;
samples -= 8; samples -= 8;

View File

@ -48,10 +48,10 @@ void convert_s32_to_float(float *out,
while (samples >= 8) while (samples >= 8)
{ {
float32x4x2_t oreg; float32x4x2_t oreg;
int32x4x2_t inreg = vld2q_s32(in); int32x4x2_t inreg = vld1q_s32_x2(in); // why were these interleaved before?
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(inreg.val[0]), vgf); oreg.val[0] = vmulq_f32(vcvtq_f32_s32(inreg.val[0]), vgf);
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(inreg.val[1]), vgf); oreg.val[1] = vmulq_f32(vcvtq_f32_s32(inreg.val[1]), vgf);
vst2q_f32(out, oreg); vst1q_f32_x2(out, oreg);
in += 8; in += 8;
out += 8; out += 8;
samples -= 8; samples -= 8;