395 lines
13 KiB
C++
Executable File
395 lines
13 KiB
C++
Executable File
#include "All.h"
|
|
#ifdef BACKWARDS_COMPATIBILITY
|
|
|
|
#include "../MACLib.h"
|
|
#include "Anti-Predictor.h"
|
|
|
|
CAntiPredictor * CreateAntiPredictor(int nCompressionLevel, int nVersion)
|
|
{
|
|
CAntiPredictor *pAntiPredictor = NULL;
|
|
|
|
switch (nCompressionLevel)
|
|
{
|
|
#ifdef ENABLE_COMPRESSION_MODE_FAST
|
|
case COMPRESSION_LEVEL_FAST:
|
|
if (nVersion < 3320)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorFast0000To3320;
|
|
}
|
|
else
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorFast3320ToCurrent;
|
|
}
|
|
break;
|
|
#endif //ENABLE_COMPRESSION_MODE_FAST
|
|
|
|
#ifdef ENABLE_COMPRESSION_MODE_NORMAL
|
|
|
|
case COMPRESSION_LEVEL_NORMAL:
|
|
if (nVersion < 3320)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal0000To3320;
|
|
}
|
|
else if (nVersion < 3800)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal3320To3800;
|
|
}
|
|
else
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal3800ToCurrent;
|
|
}
|
|
break;
|
|
|
|
#endif //ENABLE_COMPRESSION_MODE_NORMAL
|
|
|
|
#ifdef ENABLE_COMPRESSION_MODE_HIGH
|
|
case COMPRESSION_LEVEL_HIGH:
|
|
if (nVersion < 3320)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh0000To3320;
|
|
}
|
|
else if (nVersion < 3600)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3320To3600;
|
|
}
|
|
else if (nVersion < 3700)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3600To3700;
|
|
}
|
|
else if (nVersion < 3800)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3700To3800;
|
|
}
|
|
else
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3800ToCurrent;
|
|
}
|
|
break;
|
|
#endif //ENABLE_COMPRESSION_MODE_HIGH
|
|
|
|
#ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
case COMPRESSION_LEVEL_EXTRA_HIGH:
|
|
if (nVersion < 3320)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh0000To3320;
|
|
}
|
|
else if (nVersion < 3600)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3320To3600;
|
|
}
|
|
else if (nVersion < 3700)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3600To3700;
|
|
}
|
|
else if (nVersion < 3800)
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3700To3800;
|
|
}
|
|
else
|
|
{
|
|
pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3800ToCurrent;
|
|
}
|
|
break;
|
|
#endif //ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
}
|
|
|
|
return pAntiPredictor;
|
|
}
|
|
|
|
|
|
|
|
CAntiPredictor::CAntiPredictor()
|
|
{
|
|
}
|
|
|
|
CAntiPredictor::~CAntiPredictor()
|
|
{
|
|
}
|
|
|
|
void CAntiPredictor::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void CAntiPredictorOffset::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Offset, int DeltaM)
|
|
{
|
|
|
|
memcpy(pOutputArray, pInputArray, Offset * 4);
|
|
|
|
int *ip = &pInputArray[Offset];
|
|
int *ipo = &pOutputArray[0];
|
|
int *op = &pOutputArray[Offset];
|
|
int m = 0;
|
|
|
|
for (; op < &pOutputArray[NumberOfElements]; ip++, ipo++, op++)
|
|
{
|
|
*op = *ip + ((*ipo * m) >> 12);
|
|
|
|
(*ipo ^ *ip) > 0 ? m += DeltaM : m -= DeltaM;
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
|
|
int CAntiPredictorExtraHighHelper::ConventionalDotProduct(short *bip, short *bbm, short *pIPAdaptFactor, int op, int nNumberOfIterations)
|
|
{
|
|
// dot product
|
|
int nDotProduct = 0;
|
|
short *pMaxBBM = &bbm[nNumberOfIterations];
|
|
|
|
if (op == 0)
|
|
{
|
|
while(bbm < pMaxBBM)
|
|
{
|
|
EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm++;)
|
|
}
|
|
}
|
|
else if (op > 0)
|
|
{
|
|
while(bbm < pMaxBBM)
|
|
{
|
|
EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm; *bbm++ += *pIPAdaptFactor++;)
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while(bbm < pMaxBBM)
|
|
{
|
|
EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm; *bbm++ -= *pIPAdaptFactor++;)
|
|
}
|
|
}
|
|
|
|
// use the dot product
|
|
return nDotProduct;
|
|
}
|
|
|
|
#ifdef ENABLE_ASSEMBLY
|
|
|
|
#define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \
|
|
__asm movq mm0, [esi] \
|
|
__asm add esi, 8 \
|
|
__asm movq mm1, [esi] \
|
|
__asm add esi, 8 \
|
|
__asm movq mm2, [esi] \
|
|
__asm add esi, 8 \
|
|
\
|
|
__asm movq mm3, [edi] \
|
|
__asm add edi, 8 \
|
|
__asm movq mm4, [edi] \
|
|
__asm add edi, 8 \
|
|
__asm movq mm5, [edi] \
|
|
__asm sub edi, 16 \
|
|
\
|
|
__asm pmaddwd mm0, mm3 \
|
|
__asm pmaddwd mm1, mm4 \
|
|
__asm pmaddwd mm2, mm5 \
|
|
\
|
|
__asm paddd mm7, mm0 \
|
|
__asm paddd mm7, mm1 \
|
|
__asm paddd mm7, mm2 \
|
|
|
|
|
|
#define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD \
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \
|
|
\
|
|
__asm paddw mm3, DWORD PTR [eax] \
|
|
__asm movq [edi], mm3 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8 \
|
|
__asm paddw mm4, DWORD PTR [eax] \
|
|
__asm movq [edi], mm4 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8 \
|
|
__asm paddw mm5, DWORD PTR [eax] \
|
|
__asm movq [edi], mm5 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8
|
|
|
|
#define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT \
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \
|
|
\
|
|
__asm psubw mm3, DWORD PTR [eax] \
|
|
__asm movq [edi], mm3 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8 \
|
|
__asm psubw mm4, DWORD PTR [eax] \
|
|
__asm movq [edi], mm4 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8 \
|
|
__asm psubw mm5, DWORD PTR [eax] \
|
|
__asm movq [edi], mm5 \
|
|
__asm add eax, 8 \
|
|
__asm add edi, 8
|
|
|
|
int CAntiPredictorExtraHighHelper::MMXDotProduct(short *bip, short *bbm, short *pIPAdaptFactor, int op, int nNumberOfIterations)
|
|
{
|
|
int nDotProduct;
|
|
nNumberOfIterations = (nNumberOfIterations / 128);
|
|
|
|
if (op > 0)
|
|
{
|
|
__asm
|
|
{
|
|
push eax
|
|
|
|
mov eax, DWORD PTR [pIPAdaptFactor]
|
|
|
|
push esi
|
|
push edi
|
|
|
|
mov esi, DWORD PTR bip[0]
|
|
mov edi, DWORD PTR bbm[0]
|
|
|
|
pxor mm7, mm7
|
|
|
|
LBL_ADD_AGAIN:
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// process 8 mm registers full
|
|
/////////////////////////////////////////////////////////
|
|
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD
|
|
|
|
// fill the registers
|
|
movq mm0, [esi]
|
|
add esi, 8
|
|
movq mm1, [esi]
|
|
add esi, 8
|
|
|
|
movq mm3, [edi]
|
|
add edi, 8
|
|
movq mm4, [edi]
|
|
sub edi, 8
|
|
|
|
pmaddwd mm0, mm3
|
|
pmaddwd mm1, mm4
|
|
|
|
paddd mm7, mm0
|
|
paddd mm7, mm1
|
|
|
|
paddw mm3, DWORD PTR [eax]
|
|
movq [edi], mm3
|
|
add eax, 8
|
|
add edi, 8
|
|
paddw mm4, DWORD PTR [eax]
|
|
movq [edi], mm4
|
|
add eax, 8
|
|
add edi, 8
|
|
|
|
sub nNumberOfIterations, 1
|
|
cmp nNumberOfIterations, 0
|
|
jg LBL_ADD_AGAIN
|
|
|
|
///////////////////////////////////////////////////////////////
|
|
// clean-up
|
|
///////////////////////////////////////////////////////////////
|
|
// mm7 has the final dot-product (split into two dwords)
|
|
movq mm6, mm7
|
|
psrlq mm7, 32
|
|
paddd mm6, mm7
|
|
movd nDotProduct, mm6
|
|
|
|
pop edi
|
|
pop esi
|
|
pop eax
|
|
emms
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
__asm
|
|
{
|
|
push eax
|
|
|
|
mov eax, DWORD PTR [pIPAdaptFactor]
|
|
|
|
push esi
|
|
push edi
|
|
|
|
mov esi, DWORD PTR bip[0]
|
|
mov edi, DWORD PTR bbm[0]
|
|
|
|
pxor mm7, mm7
|
|
|
|
LBL_SUBTRACT_AGAIN:
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// process 8 mm registers full
|
|
/////////////////////////////////////////////////////////
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT
|
|
|
|
// fill the registers
|
|
movq mm0, [esi]
|
|
add esi, 8
|
|
movq mm1, [esi]
|
|
add esi, 8
|
|
|
|
movq mm3, [edi]
|
|
add edi, 8
|
|
movq mm4, [edi]
|
|
sub edi, 8
|
|
|
|
pmaddwd mm0, mm3
|
|
pmaddwd mm1, mm4
|
|
|
|
paddd mm7, mm0
|
|
paddd mm7, mm1
|
|
|
|
psubw mm3, DWORD PTR [eax]
|
|
movq [edi], mm3
|
|
add eax, 8
|
|
add edi, 8
|
|
psubw mm4, DWORD PTR [eax]
|
|
movq [edi], mm4
|
|
add eax, 8
|
|
add edi, 8
|
|
|
|
sub nNumberOfIterations, 1
|
|
cmp nNumberOfIterations, 0
|
|
jg LBL_SUBTRACT_AGAIN
|
|
|
|
///////////////////////////////////////////////////////////////
|
|
// clean-up
|
|
///////////////////////////////////////////////////////////////
|
|
// mm7 has the final dot-product (split into two dwords)
|
|
movq mm6, mm7
|
|
psrlq mm7, 32
|
|
paddd mm6, mm7
|
|
movd nDotProduct, mm6
|
|
|
|
pop edi
|
|
pop esi
|
|
pop eax
|
|
emms
|
|
|
|
}
|
|
}
|
|
|
|
return nDotProduct;
|
|
}
|
|
|
|
#endif // #ifdef ENABLE_ASSEMBLY
|
|
|
|
#endif // #ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH
|
|
|
|
#endif // #ifdef BACKWARDS_COMPATIBILITY
|