331 lines
12 KiB
C++
Executable File
331 lines
12 KiB
C++
Executable File
#include "All.h"
|
|
#ifdef BACKWARDS_COMPATIBILITY
|
|
|
|
#include "Anti-Predictor.h"
|
|
|
|
#ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
|
|
/*****************************************************************************************
|
|
Extra high 0000 to 3320 implementation
|
|
*****************************************************************************************/
|
|
void CAntiPredictorExtraHigh0000To3320::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Iterations, unsigned int *pOffsetValueArrayA, unsigned int *pOffsetValueArrayB) {
|
|
for (int z = Iterations; z >= 0; z--){
|
|
AntiPredictorOffset(pInputArray, pOutputArray, NumberOfElements, pOffsetValueArrayB[z], -1, 64);
|
|
AntiPredictorOffset(pOutputArray, pInputArray, NumberOfElements, pOffsetValueArrayA[z], 1, 64);
|
|
}
|
|
|
|
CAntiPredictorHigh0000To3320 AntiPredictor;
|
|
AntiPredictor.AntiPredict(pInputArray, pOutputArray, NumberOfElements);
|
|
}
|
|
|
|
void CAntiPredictorExtraHigh0000To3320::AntiPredictorOffset(int* Input_Array, int* Output_Array, int Number_of_Elements, int g, int dm, int Max_Order)
|
|
{
|
|
int q;
|
|
|
|
if ((g==0) || (Number_of_Elements <= Max_Order)) {
|
|
memcpy(Output_Array, Input_Array, Number_of_Elements * 4);
|
|
return;
|
|
}
|
|
|
|
memcpy(Output_Array, Input_Array, Max_Order * 4);
|
|
|
|
int m = 512;
|
|
|
|
if (dm > 0)
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] + (Output_Array[q - g] >> 3);
|
|
}
|
|
|
|
else
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] - (Output_Array[q - g] >> 3);
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************************
|
|
Extra high 3320 to 3600 implementation
|
|
*****************************************************************************************/
|
|
void CAntiPredictorExtraHigh3320To3600::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Iterations, unsigned int *pOffsetValueArrayA, unsigned int *pOffsetValueArrayB)
|
|
{
|
|
for (int z = Iterations; z >= 0; z--)
|
|
{
|
|
AntiPredictorOffset(pInputArray, pOutputArray, NumberOfElements, pOffsetValueArrayB[z], -1, 32);
|
|
AntiPredictorOffset(pOutputArray, pInputArray, NumberOfElements, pOffsetValueArrayA[z], 1, 32);
|
|
}
|
|
|
|
CAntiPredictorHigh0000To3320 AntiPredictor;
|
|
AntiPredictor.AntiPredict(pInputArray, pOutputArray, NumberOfElements);
|
|
}
|
|
|
|
|
|
void CAntiPredictorExtraHigh3320To3600::AntiPredictorOffset(int* Input_Array, int* Output_Array, int Number_of_Elements, int g, int dm, int Max_Order)
|
|
{
|
|
|
|
int q;
|
|
|
|
if ((g==0) || (Number_of_Elements <= Max_Order)) {
|
|
memcpy(Output_Array, Input_Array, Number_of_Elements * 4);
|
|
return;
|
|
}
|
|
|
|
memcpy(Output_Array, Input_Array, Max_Order * 4);
|
|
|
|
int m = 512;
|
|
|
|
if (dm > 0)
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] + ((Output_Array[q - g] * m) >> 12);
|
|
(Input_Array[q] ^ Output_Array[q - g]) > 0 ? m += 8 : m -= 8;
|
|
}
|
|
|
|
else
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] - ((Output_Array[q - g] * m) >> 12);
|
|
(Input_Array[q] ^ Output_Array[q - g]) > 0 ? m -= 8 : m += 8;
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************************
|
|
Extra high 3600 to 3700 implementation
|
|
*****************************************************************************************/
|
|
void CAntiPredictorExtraHigh3600To3700::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Iterations, unsigned int *pOffsetValueArrayA, unsigned int *pOffsetValueArrayB) {
|
|
for (int z = Iterations; z >= 0; ){
|
|
|
|
AntiPredictorOffset(pInputArray, pOutputArray, NumberOfElements, pOffsetValueArrayA[z], pOffsetValueArrayB[z], 64);
|
|
z--;
|
|
|
|
if (z >= 0) {
|
|
AntiPredictorOffset(pOutputArray, pInputArray, NumberOfElements, pOffsetValueArrayA[z], pOffsetValueArrayB[z], 64);
|
|
z--;
|
|
}
|
|
else {
|
|
memcpy(pInputArray, pOutputArray, NumberOfElements * 4);
|
|
goto Exit_Loop;
|
|
z--;
|
|
}
|
|
}
|
|
|
|
Exit_Loop:
|
|
CAntiPredictorHigh3600To3700 AntiPredictor;
|
|
AntiPredictor.AntiPredict(pInputArray, pOutputArray, NumberOfElements);
|
|
}
|
|
|
|
void CAntiPredictorExtraHigh3600To3700::AntiPredictorOffset(int* Input_Array, int* Output_Array, int Number_of_Elements, int g1, int g2, int Max_Order) {
|
|
int q;
|
|
|
|
if ((g1==0) || (g2==0) || (Number_of_Elements <= Max_Order)) {
|
|
memcpy(Output_Array, Input_Array, Number_of_Elements * 4);
|
|
return;
|
|
}
|
|
|
|
memcpy(Output_Array, Input_Array, Max_Order * 4);
|
|
|
|
int m = 64;
|
|
int m2 = 64;
|
|
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] + ((Output_Array[q - g1] * m) >> 9) - ((Output_Array[q - g2] * m2) >> 9);
|
|
(Input_Array[q] ^ Output_Array[q - g1]) > 0 ? m++ : m--;
|
|
(Input_Array[q] ^ Output_Array[q - g2]) > 0 ? m2-- : m2++;
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************************
|
|
Extra high 3700 to 3800 implementation
|
|
*****************************************************************************************/
|
|
void CAntiPredictorExtraHigh3700To3800::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Iterations, unsigned int *pOffsetValueArrayA, unsigned int *pOffsetValueArrayB) {
|
|
for (int z = Iterations; z >= 0; ) {
|
|
|
|
AntiPredictorOffset(pInputArray, pOutputArray, NumberOfElements, pOffsetValueArrayA[z], pOffsetValueArrayB[z], 64);
|
|
z--;
|
|
|
|
if (z >= 0) {
|
|
AntiPredictorOffset(pOutputArray, pInputArray, NumberOfElements, pOffsetValueArrayA[z], pOffsetValueArrayB[z], 64);
|
|
z--;
|
|
}
|
|
else {
|
|
memcpy(pInputArray, pOutputArray, NumberOfElements * 4);
|
|
goto Exit_Loop;
|
|
z--;
|
|
}
|
|
}
|
|
|
|
Exit_Loop:
|
|
CAntiPredictorHigh3700To3800 AntiPredictor;
|
|
AntiPredictor.AntiPredict(pInputArray, pOutputArray, NumberOfElements);
|
|
|
|
}
|
|
|
|
void CAntiPredictorExtraHigh3700To3800::AntiPredictorOffset(int* Input_Array, int* Output_Array, int Number_of_Elements, int g1, int g2, int Max_Order) {
|
|
int q;
|
|
|
|
if ((g1==0) || (g2==0) || (Number_of_Elements <= Max_Order)) {
|
|
memcpy(Output_Array, Input_Array, Number_of_Elements * 4);
|
|
return;
|
|
}
|
|
|
|
memcpy(Output_Array, Input_Array, Max_Order * 4);
|
|
|
|
int m = 64;
|
|
int m2 = 64;
|
|
|
|
for (q = Max_Order; q < Number_of_Elements; q++) {
|
|
Output_Array[q] = Input_Array[q] + ((Output_Array[q - g1] * m) >> 9) - ((Output_Array[q - g2] * m2) >> 9);
|
|
(Input_Array[q] ^ Output_Array[q - g1]) > 0 ? m++ : m--;
|
|
(Input_Array[q] ^ Output_Array[q - g2]) > 0 ? m2-- : m2++;
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************************
|
|
Extra high 3800 to Current
|
|
*****************************************************************************************/
|
|
void CAntiPredictorExtraHigh3800ToCurrent::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, BOOL bMMXAvailable, int CPULoadBalancingFactor, int nVersion)
|
|
{
|
|
const int nFilterStageElements = (nVersion < 3830) ? 128 : 256;
|
|
const int nFilterStageShift = (nVersion < 3830) ? 11 : 12;
|
|
const int nMaxElements = (nVersion < 3830) ? 134 : 262;
|
|
const int nFirstElement = (nVersion < 3830) ? 128 : 256;
|
|
const int nStageCShift = (nVersion < 3830) ? 10 : 11;
|
|
|
|
//short frame handling
|
|
if (NumberOfElements < nMaxElements) {
|
|
memcpy(pOutputArray, pInputArray, NumberOfElements * 4);
|
|
return;
|
|
}
|
|
|
|
//make the first five samples identical in both arrays
|
|
memcpy(pOutputArray, pInputArray, nFirstElement * 4);
|
|
|
|
//variable declares and initializations
|
|
//short bm[nFirstElement]; memset(bm, 0, nFirstElement * 2);
|
|
short bm[256]; memset(bm, 0, 256 * 2);
|
|
int m2 = 64, m3 = 115, m4 = 64, m5 = 740, m6 = 0;
|
|
int p4 = pInputArray[nFirstElement - 1];
|
|
int p3 = (pInputArray[nFirstElement - 1] - pInputArray[nFirstElement - 2]) << 1;
|
|
int p2 = pInputArray[nFirstElement - 1] + ((pInputArray[nFirstElement - 3] - pInputArray[nFirstElement - 2]) << 3);// - pInputArray[3] + pInputArray[2];
|
|
int *op = &pOutputArray[nFirstElement];
|
|
int *ip = &pInputArray[nFirstElement];
|
|
int IPP2 = ip[-2];
|
|
int IPP1 = ip[-1];
|
|
int p7 = 2 * ip[-1] - ip[-2];
|
|
int opp = op[-1];
|
|
int Original;
|
|
CAntiPredictorExtraHighHelper Helper;
|
|
|
|
//undo the initial prediction stuff
|
|
int q; // loop variable
|
|
for (q = 1; q < nFirstElement; q++) {
|
|
pOutputArray[q] += pOutputArray[q - 1];
|
|
}
|
|
|
|
//pump the primary loop
|
|
short *IPAdaptFactor = (short *) calloc(NumberOfElements, 2);
|
|
short *IPShort = (short *) calloc(NumberOfElements, 2);
|
|
for (q = 0; q < nFirstElement; q++) {
|
|
IPAdaptFactor[q] = ((pInputArray[q] >> 30) & 2) - 1;
|
|
IPShort[q] = short(pInputArray[q]);
|
|
}
|
|
|
|
int FM[9]; memset(&FM[0], 0, 9 * 4);
|
|
int FP[9]; memset(&FP[0], 0, 9 * 4);
|
|
|
|
for (q = nFirstElement; op < &pOutputArray[NumberOfElements]; op++, ip++, q++) {
|
|
//CPU load-balancing
|
|
if (CPULoadBalancingFactor > 0) {
|
|
if ((q % CPULoadBalancingFactor) == 0) { SLEEP(1); }
|
|
}
|
|
|
|
if (nVersion >= 3830)
|
|
{
|
|
int *pFP = &FP[8];
|
|
int *pFM = &FM[8];
|
|
int nDotProduct = 0;
|
|
FP[0] = ip[0];
|
|
|
|
if (FP[0] == 0)
|
|
{
|
|
EXPAND_8_TIMES(nDotProduct += *pFP * *pFM--; *pFP-- = *(pFP - 1);)
|
|
}
|
|
else if (FP[0] > 0)
|
|
{
|
|
EXPAND_8_TIMES(nDotProduct += *pFP * *pFM; *pFM-- += ((*pFP >> 30) & 2) - 1; *pFP-- = *(pFP - 1);)
|
|
}
|
|
else
|
|
{
|
|
EXPAND_8_TIMES(nDotProduct += *pFP * *pFM; *pFM-- -= ((*pFP >> 30) & 2) - 1; *pFP-- = *(pFP - 1);)
|
|
}
|
|
|
|
*ip -= nDotProduct >> 9;
|
|
}
|
|
|
|
Original = *ip;
|
|
|
|
IPShort[q] = short(*ip);
|
|
IPAdaptFactor[q] = ((ip[0] >> 30) & 2) - 1;
|
|
|
|
#ifdef ENABLE_ASSEMBLY
|
|
if (bMMXAvailable && (Original != 0))
|
|
{
|
|
*ip -= (Helper.MMXDotProduct(&IPShort[q-nFirstElement], &bm[0], &IPAdaptFactor[q-nFirstElement], Original, nFilterStageElements) >> nFilterStageShift);
|
|
}
|
|
else
|
|
{
|
|
*ip -= (Helper.ConventionalDotProduct(&IPShort[q-nFirstElement], &bm[0], &IPAdaptFactor[q-nFirstElement], Original, nFilterStageElements) >> nFilterStageShift);
|
|
}
|
|
#else
|
|
*ip -= (Helper.ConventionalDotProduct(&IPShort[q-nFirstElement], &bm[0], &IPAdaptFactor[q-nFirstElement], Original, nFilterStageElements) >> nFilterStageShift);
|
|
#endif
|
|
|
|
IPShort[q] = short(*ip);
|
|
IPAdaptFactor[q] = ((ip[0] >> 30) & 2) - 1;
|
|
|
|
/////////////////////////////////////////////
|
|
*op = *ip + (((p2 * m2) + (p3 * m3) + (p4 * m4)) >> 11);
|
|
|
|
if (*ip > 0) {
|
|
m2 -= ((p2 >> 30) & 2) - 1;
|
|
m3 -= ((p3 >> 28) & 8) - 4;
|
|
m4 -= ((p4 >> 28) & 8) - 4;
|
|
}
|
|
else if (*ip < 0) {
|
|
m2 += ((p2 >> 30) & 2) - 1;
|
|
m3 += ((p3 >> 28) & 8) - 4;
|
|
m4 += ((p4 >> 28) & 8) - 4;
|
|
}
|
|
|
|
|
|
p2 = *op + ((IPP2 - p4) << 3);
|
|
p3 = (*op - p4) << 1;
|
|
IPP2 = p4;
|
|
p4 = *op;
|
|
|
|
/////////////////////////////////////////////
|
|
*op += (((p7 * m5) - (opp * m6)) >> nStageCShift);
|
|
|
|
if (p4 > 0) {
|
|
m5 -= ((p7 >> 29) & 4) - 2;
|
|
m6 += ((opp >> 30) & 2) - 1;
|
|
}
|
|
else if (p4 < 0) {
|
|
m5 += ((p7 >> 29) & 4) - 2;
|
|
m6 -= ((opp >> 30) & 2) - 1;
|
|
}
|
|
|
|
p7 = 2 * *op - opp;
|
|
opp = *op;
|
|
|
|
/////////////////////////////////////////////
|
|
*op += ((op[-1] * 31) >> 5);
|
|
|
|
}
|
|
|
|
free(IPAdaptFactor);
|
|
free(IPShort);
|
|
}
|
|
|
|
#endif // #ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
|
|
#endif // #ifdef BACKWARDS_COMPATIBILITY
|