/*
Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved.
This software is subject to the Apache v2.0 License.
*/

//************************************************************************* 
//This file include the Quantization functions from JPEG Chapter
//	fwiQuantFwdRawTableInit_JPEG_8u
//  fwiQuantFwdTableInit_JPEG_8u16u
//	fwiQuantFwd8x8_JPEG_16s_C1I
//	fwiQuantInvTableInit_JPEG_8u16u
//	fwiQuantInv8x8_JPEG_16s_C1I
//************************************************************************* 

#include "fwdev.h"
#include "fwJPEG.h"
#include "FwSharedCode_SSE2.h"

using namespace OPT_LEVEL;

#if BUILD_NUM_AT_LEAST( 102 )

//-----------------------------------------------------------------------
//This functions converts the raw quantization table according with quality
//Factor.
//-----------------------------------------------------------------------
FwStatus PREFIX_OPT(OPT_PREFIX, fwiQuantFwdRawTableInit_JPEG_8u)(Fw8u *pQuantRawTable, int qualityFactor)
{
	if (pQuantRawTable == 0) return fwStsNullPtrErr;
	if (qualityFactor <=0 ) qualityFactor=1;
	else if (qualityFactor > 100) qualityFactor=100;

	if (qualityFactor <50) qualityFactor= 5000/qualityFactor;
	else qualityFactor = 200 - qualityFactor*2;

	int i, result;

	for (i=0;i<64;i++) {
		if (pQuantRawTable[i]==0) pQuantRawTable[i]=1;
		else {
			result = (pQuantRawTable[i]*qualityFactor+50)/100;
			pQuantRawTable[i]= FW_REF::Limits<U8>::Sat(result);
		}
	}

	return fwStsNoErr;
}

extern const Fw8u zigZagInvOrder[64];// =
//{
//	 0,  1,  5,  6, 14, 15, 27, 28,
//	 2,  4,  7, 13, 16, 26, 29, 42,
//	 3,  8, 12, 17, 25, 30, 41, 43,
//	 9, 11, 18, 24, 31, 40, 44, 53,
//	10, 19, 23, 32, 39, 45, 52, 54,
//	20, 22, 33, 38, 46, 51, 55, 60,
//	21, 34, 37, 47, 50, 56, 59, 61,
//	35, 36, 48, 49, 57, 58, 62, 63
//};

//-----------------------------------------------------------------------
//This function reorder the initial raw table which has zigzag order to 
//conventional order. It also scales the array by 15 bits.
//It is used for fast encoding.
//-----------------------------------------------------------------------
FwStatus PREFIX_OPT(OPT_PREFIX, fwiQuantFwdTableInit_JPEG_8u16u)(const Fw8u *pQuantRawTable, 
										   Fw16u *pQuantFwdTable)
{
	if (pQuantRawTable==0 || pQuantFwdTable==0) return fwStsNullPtrErr;

	Fw16u scale15 = 32768;
	Fw8u temp;
//#define CBL_U16_MAX 0xFFFF
	for(int i=0; i<64; i++)	{// convert zz to top-down order
		temp = pQuantRawTable[zigZagInvOrder[i]];
		if (temp) 
			pQuantFwdTable[i] = (scale15+(temp>>1))/temp;
		else 
			pQuantFwdTable[i] = CBL_U16_MAX;
	}

	return fwStsNoErr;
}

//-----------------------------------------------------------------------
//This function performs quantization of computed DCT coefficients for an
//8*8 block. The formula from JPEG standard is the following.
//	sq[vu] = round (s[vu]/Q[vu])
//The rounding is the nearest integer method.
//Note: pQuantFwdTable is generated by fwiQuantFwdTableInit_JPEG_8u16u, 
//so the final result for sq[vu] will be shifted 15 bits.
//-----------------------------------------------------------------------
FwStatus PREFIX_OPT(OPT_PREFIX, fwiQuantFwd8x8_JPEG_16s_C1I)(Fw16s *pSrcDst, const Fw16u*
									   pQuantFwdTable) 
{
	if (pSrcDst==0 || pQuantFwdTable==0) return fwStsNullPtrErr;

	//Nearest integer mode for 2^14 = 16384
	for (int i=0;i<64;i++) {
		pSrcDst[i] = (pSrcDst[i] * pQuantFwdTable[i]+16384)>>15;
	}

	return fwStsNoErr;
}

//-----------------------------------------------------------------------
//This function reorder the zigzag order table to conventional order, and 
//is used for fast decoding.
//-----------------------------------------------------------------------
FwStatus PREFIX_OPT(OPT_PREFIX, fwiQuantInvTableInit_JPEG_8u16u)(const Fw8u *pQuantRawTable, Fw16u *pQuantInvTable)
{
	if (pQuantRawTable==0 || pQuantInvTable==0) return fwStsNullPtrErr;

	for(int i=0; i<64; i++)
		pQuantInvTable[i] = pQuantRawTable[zigZagInvOrder[i]];

	return fwStsNoErr;
}

//-----------------------------------------------------------------------
//This function performs dequantization of computed DCT coefficients for an
//8*8 block. The formula from JPEG standard is the following.
//	s[vu] = (sq[vu]*Q[vu])
//Some saturate for the data might be required due to the rounding.
//-----------------------------------------------------------------------
namespace OPT_LEVEL
{
SYS_INLINE FwStatus quantInv8x8_JPEG_16s_C1I(Fw16s *pSrcDst, const Fw16u*
									   pQuantInvTable) 
{
	switch(Dispatch::Type<DT_SSE2>())
	{
	    case DT_SSE3:
	    case DT_SSE2:
            {
                for(int i =0; i< 8; i++)
                {   
                     __m128i srcDst = _mm_loadu_si128((reinterpret_cast<__m128i*>(pSrcDst)) + i);
                        const __m128i quant = _mm_loadu_si128((reinterpret_cast<const __m128i*>(pQuantInvTable)) + i);

                     __m128i low = _mm_mullo_epi16(srcDst, quant);
				    __m128i high = _mm_mulhi_epi16(srcDst, quant);
				        
                    srcDst = _mm_unpacklo_epi16(low, high);
				    low = _mm_unpackhi_epi16(low, high);
				    FW_SSE2::pack32STo16S(srcDst, low);

                    _mm_storeu_si128((reinterpret_cast<__m128i*>(pSrcDst)) + i, srcDst);

                }
            }
            break;
        case DT_REFR:
	    default:
	    for (int i=0;i<64;i++) {
		    pSrcDst[i]= FW_REF::Limits<S16>::Sat(pSrcDst[i]*pQuantInvTable[i]);
	    }
    }
    return fwStsNoErr;
}
};
FwStatus PREFIX_OPT(OPT_PREFIX, fwiQuantInv8x8_JPEG_16s_C1I)(Fw16s *pSrcDst, const Fw16u*
									   pQuantInvTable) 
{
	if (pSrcDst==0 || pQuantInvTable==0) return fwStsNullPtrErr;
    return quantInv8x8_JPEG_16s_C1I(pSrcDst, pQuantInvTable);

	
}

#endif //BUILD_NUM_AT_LEAST

// Please do NOT remove the above line for CPP files that need to be multipass compiled
// OREFR OSSE2 
