#include "All.h" #ifdef BACKWARDS_COMPATIBILITY #include "../MACLib.h" #include "Anti-Predictor.h" CAntiPredictor * CreateAntiPredictor(int nCompressionLevel, int nVersion) { CAntiPredictor *pAntiPredictor = NULL; switch (nCompressionLevel) { #ifdef ENABLE_COMPRESSION_MODE_FAST case COMPRESSION_LEVEL_FAST: if (nVersion < 3320) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorFast0000To3320; } else { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorFast3320ToCurrent; } break; #endif //ENABLE_COMPRESSION_MODE_FAST #ifdef ENABLE_COMPRESSION_MODE_NORMAL case COMPRESSION_LEVEL_NORMAL: if (nVersion < 3320) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal0000To3320; } else if (nVersion < 3800) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal3320To3800; } else { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorNormal3800ToCurrent; } break; #endif //ENABLE_COMPRESSION_MODE_NORMAL #ifdef ENABLE_COMPRESSION_MODE_HIGH case COMPRESSION_LEVEL_HIGH: if (nVersion < 3320) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh0000To3320; } else if (nVersion < 3600) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3320To3600; } else if (nVersion < 3700) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3600To3700; } else if (nVersion < 3800) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3700To3800; } else { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorHigh3800ToCurrent; } break; #endif //ENABLE_COMPRESSION_MODE_HIGH #ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH case COMPRESSION_LEVEL_EXTRA_HIGH: if (nVersion < 3320) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh0000To3320; } else if (nVersion < 3600) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3320To3600; } else if (nVersion < 3700) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3600To3700; } else if (nVersion < 3800) { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3700To3800; } else { pAntiPredictor = (CAntiPredictor *) new CAntiPredictorExtraHigh3800ToCurrent; } break; #endif //ENABLE_COMPRESSION_MODE_EXTRA_HIGH } return pAntiPredictor; } CAntiPredictor::CAntiPredictor() { } CAntiPredictor::~CAntiPredictor() { } void CAntiPredictor::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements) { return; } void CAntiPredictorOffset::AntiPredict(int *pInputArray, int *pOutputArray, int NumberOfElements, int Offset, int DeltaM) { memcpy(pOutputArray, pInputArray, Offset * 4); int *ip = &pInputArray[Offset]; int *ipo = &pOutputArray[0]; int *op = &pOutputArray[Offset]; int m = 0; for (; op < &pOutputArray[NumberOfElements]; ip++, ipo++, op++) { *op = *ip + ((*ipo * m) >> 12); (*ipo ^ *ip) > 0 ? m += DeltaM : m -= DeltaM; } } #ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH int CAntiPredictorExtraHighHelper::ConventionalDotProduct(short *bip, short *bbm, short *pIPAdaptFactor, int op, int nNumberOfIterations) { // dot product int nDotProduct = 0; short *pMaxBBM = &bbm[nNumberOfIterations]; if (op == 0) { while(bbm < pMaxBBM) { EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm++;) } } else if (op > 0) { while(bbm < pMaxBBM) { EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm; *bbm++ += *pIPAdaptFactor++;) } } else { while(bbm < pMaxBBM) { EXPAND_32_TIMES(nDotProduct += *bip++ * *bbm; *bbm++ -= *pIPAdaptFactor++;) } } // use the dot product return nDotProduct; } #ifdef ENABLE_ASSEMBLY #define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \ __asm movq mm0, [esi] \ __asm add esi, 8 \ __asm movq mm1, [esi] \ __asm add esi, 8 \ __asm movq mm2, [esi] \ __asm add esi, 8 \ \ __asm movq mm3, [edi] \ __asm add edi, 8 \ __asm movq mm4, [edi] \ __asm add edi, 8 \ __asm movq mm5, [edi] \ __asm sub edi, 16 \ \ __asm pmaddwd mm0, mm3 \ __asm pmaddwd mm1, mm4 \ __asm pmaddwd mm2, mm5 \ \ __asm paddd mm7, mm0 \ __asm paddd mm7, mm1 \ __asm paddd mm7, mm2 \ #define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD \ MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \ \ __asm paddw mm3, DWORD PTR [eax] \ __asm movq [edi], mm3 \ __asm add eax, 8 \ __asm add edi, 8 \ __asm paddw mm4, DWORD PTR [eax] \ __asm movq [edi], mm4 \ __asm add eax, 8 \ __asm add edi, 8 \ __asm paddw mm5, DWORD PTR [eax] \ __asm movq [edi], mm5 \ __asm add eax, 8 \ __asm add edi, 8 #define MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT \ MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_HEAD \ \ __asm psubw mm3, DWORD PTR [eax] \ __asm movq [edi], mm3 \ __asm add eax, 8 \ __asm add edi, 8 \ __asm psubw mm4, DWORD PTR [eax] \ __asm movq [edi], mm4 \ __asm add eax, 8 \ __asm add edi, 8 \ __asm psubw mm5, DWORD PTR [eax] \ __asm movq [edi], mm5 \ __asm add eax, 8 \ __asm add edi, 8 int CAntiPredictorExtraHighHelper::MMXDotProduct(short *bip, short *bbm, short *pIPAdaptFactor, int op, int nNumberOfIterations) { int nDotProduct; nNumberOfIterations = (nNumberOfIterations / 128); if (op > 0) { __asm { push eax mov eax, DWORD PTR [pIPAdaptFactor] push esi push edi mov esi, DWORD PTR bip[0] mov edi, DWORD PTR bbm[0] pxor mm7, mm7 LBL_ADD_AGAIN: ///////////////////////////////////////////////////////// // process 8 mm registers full ///////////////////////////////////////////////////////// MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_ADD // fill the registers movq mm0, [esi] add esi, 8 movq mm1, [esi] add esi, 8 movq mm3, [edi] add edi, 8 movq mm4, [edi] sub edi, 8 pmaddwd mm0, mm3 pmaddwd mm1, mm4 paddd mm7, mm0 paddd mm7, mm1 paddw mm3, DWORD PTR [eax] movq [edi], mm3 add eax, 8 add edi, 8 paddw mm4, DWORD PTR [eax] movq [edi], mm4 add eax, 8 add edi, 8 sub nNumberOfIterations, 1 cmp nNumberOfIterations, 0 jg LBL_ADD_AGAIN /////////////////////////////////////////////////////////////// // clean-up /////////////////////////////////////////////////////////////// // mm7 has the final dot-product (split into two dwords) movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd nDotProduct, mm6 pop edi pop esi pop eax emms } } else { __asm { push eax mov eax, DWORD PTR [pIPAdaptFactor] push esi push edi mov esi, DWORD PTR bip[0] mov edi, DWORD PTR bbm[0] pxor mm7, mm7 LBL_SUBTRACT_AGAIN: ///////////////////////////////////////////////////////// // process 8 mm registers full ///////////////////////////////////////////////////////// MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT MMX_DOT_PRODUCT_3800_TO_CURRENT_PROCESS_CHUNK_SUBTRACT // fill the registers movq mm0, [esi] add esi, 8 movq mm1, [esi] add esi, 8 movq mm3, [edi] add edi, 8 movq mm4, [edi] sub edi, 8 pmaddwd mm0, mm3 pmaddwd mm1, mm4 paddd mm7, mm0 paddd mm7, mm1 psubw mm3, DWORD PTR [eax] movq [edi], mm3 add eax, 8 add edi, 8 psubw mm4, DWORD PTR [eax] movq [edi], mm4 add eax, 8 add edi, 8 sub nNumberOfIterations, 1 cmp nNumberOfIterations, 0 jg LBL_SUBTRACT_AGAIN /////////////////////////////////////////////////////////////// // clean-up /////////////////////////////////////////////////////////////// // mm7 has the final dot-product (split into two dwords) movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd nDotProduct, mm6 pop edi pop esi pop eax emms } } return nDotProduct; } #endif // #ifdef ENABLE_ASSEMBLY #endif // #ifdef ENABLE_COMPRESSION_MODE_EXTRA_HIGH #endif // #ifdef BACKWARDS_COMPATIBILITY