cog/Frameworks/lazyusf2/lazyusf2/rsp_hle/hvqm.c

354 lines
10 KiB
C

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Mupen64plus-rsp-hle - hvqm.c *
* Mupen64Plus homepage: https://mupen64plus.org/ *
* Copyright (C) 2020 Gilles Siberlin *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include <assert.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "hle_external.h"
#include "hle_internal.h"
#include "memory.h"
/* Nest size */
#define HVQM2_NESTSIZE_L 70 /* Number of elements on long side */
#define HVQM2_NESTSIZE_S 38 /* Number of elements on short side */
#define HVQM2_NESTSIZE (HVQM2_NESTSIZE_L * HVQM2_NESTSIZE_S)
struct HVQM2Block {
uint8_t nbase;
uint8_t dc;
uint8_t dc_l;
uint8_t dc_r;
uint8_t dc_u;
uint8_t dc_d;
};
struct HVQM2Basis {
uint8_t sx;
uint8_t sy;
int16_t scale;
uint16_t offset;
uint16_t lineskip;
};
struct HVQM2Arg {
uint32_t info;
uint32_t buf;
uint16_t buf_width;
uint8_t chroma_step_h;
uint8_t chroma_step_v;
uint16_t hmcus;
uint16_t vmcus;
uint8_t alpha;
uint32_t nest;
};
struct RGBA {
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
static struct HVQM2Arg arg;
static const int16_t constant[5][16] = {
{0x0006,0x0008,0x0008,0x0006,0x0008,0x000A,0x000A,0x0008,0x0008,0x000A,0x000A,0x0008,0x0006,0x0008,0x0008,0x0006},
{0x0002,0x0000,0xFFFF,0xFFFF,0x0002,0x0000,0xFFFF,0xFFFF,0x0002,0x0000,0xFFFF,0xFFFF,0x0002,0x0000,0xFFFF,0xFFFF},
{0xFFFF,0xFFFF,0x0000,0x0002,0xFFFF,0xFFFF,0x0000,0x0002,0xFFFF,0xFFFF,0x0000,0x0002,0xFFFF,0xFFFF,0x0000,0x0002},
{0x0002,0x0002,0x0002,0x0002,0x0000,0x0000,0x0000,0x0000,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF},
{0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0x0000,0x0000,0x0000,0x0000,0x0002,0x0002,0x0002,0x0002}
};
static int process_info(struct hle_t* hle, uint8_t* base, int16_t* out)
{
struct HVQM2Block block;
uint8_t nbase = *base;
dram_load_u8(hle, (uint8_t*)&block, arg.info, sizeof(struct HVQM2Block));
arg.info += 8;
*base = block.nbase & 0x7;
if ((block.nbase & nbase) != 0)
return 0;
if (block.nbase == 0)
{
//LABEL8
for (int i = 0; i < 16; i++)
{
out[i] = constant[0][i] * block.dc;
out[i] += constant[1][i] * block.dc_l;
out[i] += constant[2][i] * block.dc_r;
out[i] += constant[3][i] * block.dc_u;
out[i] += constant[4][i] * block.dc_d;
out[i] += 4;
out[i] >>= 3;
}
}
else if ((block.nbase & 0xf) == 0)
{
//LABEL7
for (int i = 0; i < 16; i++)
{
out[i] = *dram_u8(hle, arg.info);
arg.info++;
}
}
else if (*base == 0)
{
//LABEL6
for (int i = 0; i < 16; i++)
{
out[i] = *(int8_t*)dram_u8(hle, arg.info) + block.dc;
arg.info++;
}
}
else
{
//LABEL5
struct HVQM2Basis basis;
for (int i = 0; i < 16; i++)
out[i] = block.dc;
for (; *base != 0; (*base)--)
{
basis.sx = *dram_u8(hle, arg.info);
arg.info++;
basis.sy = *dram_u8(hle, arg.info);
arg.info++;
basis.scale = *dram_u16(hle, arg.info);
arg.info += 2;
basis.offset = *dram_u16(hle, arg.info);
arg.info += 2;
basis.lineskip = *dram_u16(hle, arg.info);
arg.info += 2;
int16_t vec[16];
uint32_t addr = arg.nest + basis.offset;
int shift = (basis.sx != 0) ? 1 : 0;
//LABEL9
//LABEL10
for (int i = 0; i < 16; i += 4)
{
vec[i] = *dram_u8(hle, addr);
vec[i + 1] = *dram_u8(hle, addr + (1 << shift));
vec[i + 2] = *dram_u8(hle, addr + (2 << shift));
vec[i + 3] = *dram_u8(hle, addr + (3 << shift));
addr += basis.lineskip;
}
//LABEL11
int16_t sum = 0x8;
for (int i = 0; i < 16; i++)
sum += vec[i];
sum >>= 4;
int16_t max = 0;
for (int i = 0; i < 16; i++)
{
vec[i] -= sum;
max = (abs(vec[i]) > max) ? abs(vec[i]) : max;
}
double dmax = 0.0;
if (max > 0)
dmax = (double)(basis.scale << 2) / (double)max;
for (int i = 0; i < 16; i++)
out[i] += (vec[i] < 0) ? (int16_t)((double)vec[i] * dmax - 0.5) : (int16_t)((double)vec[i] * dmax + 0.5);
block.nbase &= 8;
}
assert(block.nbase == 0);
//if(block.nbase != 0)
// LABEL6
}
return 1;
}
#define SATURATE8(x) ((unsigned int) x <= 255 ? x : (x < 0 ? 0: 255))
static struct RGBA YCbCr_to_RGBA(int16_t Y, int16_t Cb, int16_t Cr, uint8_t alpha)
{
struct RGBA color;
//Format S10.6
int r = (int)(((double)Y + 0.5) + (1.765625 * (double)(Cr - 128)));
int g = (int)(((double)Y + 0.5) - (0.34375 * (double)(Cr - 128)) - (0.71875 * (double)(Cb - 128)));
int b = (int)(((double)Y + 0.5) + (1.40625 * (double)(Cb - 128)));
color.r = SATURATE8(r);
color.g = SATURATE8(g);
color.b = SATURATE8(b);
color.a = alpha;
return color;
}
void store_rgba5551(struct hle_t* hle, struct RGBA color, uint32_t * addr)
{
uint16_t pixel = ((color.b >> 3) << 11) | ((color.g >> 3) << 6) | ((color.r >> 3) << 1) | (color.a & 1);
dram_store_u16(hle, &pixel, *addr, 1);
*addr += 2;
}
void store_rgba8888(struct hle_t* hle, struct RGBA color, uint32_t * addr)
{
uint32_t pixel = (color.b << 24) | (color.g << 16) | (color.r << 8) | color.a;
dram_store_u32(hle, &pixel, *addr, 1);
*addr += 4;
}
typedef void(*store_pixel_t)(struct hle_t* hle, struct RGBA color, uint32_t * addr);
static void hvqm2_decode(struct hle_t* hle, int is32)
{
//uint32_t uc_data_ptr = *dmem_u32(hle, TASK_UCODE_DATA);
uint32_t data_ptr = *dmem_u32(hle, TASK_DATA_PTR);
assert((*dmem_u32(hle, TASK_FLAGS) & 0x1) == 0);
/* Fill HVQM2Arg struct */
arg.info = *dram_u32(hle, data_ptr);
data_ptr += 4;
arg.buf = *dram_u32(hle, data_ptr);
data_ptr += 4;
arg.buf_width = *dram_u16(hle, data_ptr);
data_ptr += 2;
arg.chroma_step_h = *dram_u8(hle, data_ptr);
data_ptr++;
arg.chroma_step_v = *dram_u8(hle, data_ptr);
data_ptr++;
arg.hmcus = *dram_u16(hle, data_ptr);
data_ptr += 2;
arg.vmcus = *dram_u16(hle, data_ptr);
data_ptr += 2;
arg.alpha = *dram_u8(hle, data_ptr);
arg.nest = data_ptr + 1;
assert(arg.chroma_step_h == 2);
assert((arg.chroma_step_v == 1) || (arg.chroma_step_v == 2));
assert((*hle->sp_status & 0x80) == 0); //SP_STATUS_YIELD
int length, skip;
store_pixel_t store_pixel;
if (is32)
{
length = 0x20;
skip = arg.buf_width << 2;
arg.buf_width <<= 4;
store_pixel = &store_rgba8888;
}
else
{
length = 0x10;
skip = arg.buf_width << 1;
arg.buf_width <<= 3;
store_pixel = &store_rgba5551;
}
if (arg.chroma_step_v == 2)
arg.buf_width += arg.buf_width;
for (int i = arg.vmcus; i != 0; i--)
{
uint32_t out;
int j;
for (j = arg.hmcus, out = arg.buf; j != 0; j--, out += length)
{
uint8_t base = 0x80;
int16_t Cb[16], Cr[16], Y1[32], Y2[32];
int16_t* pCb = Cb;
int16_t* pCr = Cr;
int16_t* pY1 = Y1;
int16_t* pY2 = Y2;
if (arg.chroma_step_v == 2)
{
if (process_info(hle, &base, pY1) == 0)
continue;
if (process_info(hle, &base, pY2) == 0)
continue;
pY1 = &Y1[16];
pY2 = &Y2[16];
}
if (process_info(hle, &base, pY1) == 0)
continue;
if (process_info(hle, &base, pY2) == 0)
continue;
if (process_info(hle, &base, Cr) == 0)
continue;
if (process_info(hle, &base, Cb) == 0)
continue;
pY1 = Y1;
pY2 = Y2;
uint32_t out_buf = out;
for (int k = 0; k < 4; k++)
{
for (int m = 0; m < arg.chroma_step_v; m++)
{
uint32_t addr = out_buf;
for (int l = 0; l < 4; l++)
{
struct RGBA color = YCbCr_to_RGBA(pY1[l], pCb[l >> 1], pCr[l >> 1], arg.alpha);
store_pixel(hle, color, &addr);
}
for (int l = 0; l < 4; l++)
{
struct RGBA color = YCbCr_to_RGBA(pY2[l], pCb[(l + 4) >> 1], pCr[(l + 4) >> 1], arg.alpha);
store_pixel(hle, color, &addr);
}
out_buf += skip;
pY1 += 4;
pY2 += 4;
}
pCr += 4;
pCb += 4;
}
}
arg.buf += arg.buf_width;
}
rsp_break(hle, SP_STATUS_TASKDONE);
}
void hvqm2_decode_sp1_task(struct hle_t* hle)
{
hvqm2_decode(hle, 0);
}
void hvqm2_decode_sp2_task(struct hle_t* hle)
{
hvqm2_decode(hle, 1);
}