Cog Audio: Change NEON s16/s32 to f32 code to use linear load/store instead of interleaved

2022-01-15 02:08:04 -08:00 · 2022-01-15 02:08:04 -08:00 · c5b940680d
parent 5ab728b205
commit c5b940680d
2 changed files with 4 additions and 4 deletions
--- a/ThirdParty/RetroArch/libretro-common/audio/conversion/s16_to_float.c
+++ b/ThirdParty/RetroArch/libretro-common/audio/conversion/s16_to_float.c
@ -48,12 +48,12 @@ void convert_s16_to_float(float *out,
      while (samples >= 8)
      {
         float32x4x2_t oreg;
-         int16x4x2_t inreg   = vld2_s16(in);
+         int16x4x2_t inreg   = vld1_s16_x2(in); // why were these interleaved before?
         int32x4_t      p1   = vmovl_s16(inreg.val[0]);
         int32x4_t      p2   = vmovl_s16(inreg.val[1]);
         oreg.val[0]         = vmulq_f32(vcvtq_f32_s32(p1), vgf);
         oreg.val[1]         = vmulq_f32(vcvtq_f32_s32(p2), vgf);
-         vst2q_f32(out, oreg);
+         vst1q_f32_x2(out, oreg);
         in                 += 8;
         out                += 8;
         samples            -= 8;
--- a/ThirdParty/RetroArch/libretro-common/audio/conversion/s32_to_float.c
+++ b/ThirdParty/RetroArch/libretro-common/audio/conversion/s32_to_float.c
@ -48,10 +48,10 @@ void convert_s32_to_float(float *out,
      while (samples >= 8)
      {
         float32x4x2_t oreg;
-         int32x4x2_t inreg   = vld2q_s32(in);
+         int32x4x2_t inreg   = vld1q_s32_x2(in); // why were these interleaved before?
         oreg.val[0]         = vmulq_f32(vcvtq_f32_s32(inreg.val[0]), vgf);
         oreg.val[1]         = vmulq_f32(vcvtq_f32_s32(inreg.val[1]), vgf);
-         vst2q_f32(out, oreg);
+         vst1q_f32_x2(out, oreg);
         in                 += 8;
         out                += 8;
         samples            -= 8;