/*
 * Simple IDCT
 *
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*
  based upon some outcommented c code from mpeg2dec (idct_mmx.c
  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) 
  
  adapted to ProjectX-JNI by Pedro A. Aranda <paaguti@sourceforge.net>
 */

#include "idct.h"

#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
    
#define ROW_SHIFT 11
#define COL_SHIFT 20 // 6

/* 8x8 Matrix used to do a trivial (slow) 8 point IDCT */
static int coeff[64] = {
	W4, W4, W4, W4, W4, W4, W4, W4,
	W1, W3, W5, W7,-W7,-W5,-W3,-W1,
	W2, W6,-W6,-W2,-W2,-W6, W6, W2,
	W3,-W7,-W1,-W5, W5, W1, W7,-W3,
	W4,-W4,-W4, W4, W4,-W4,-W4, W4,
	W5,-W1, W7, W3,-W3,-W7, W1,-W5,
	W6,-W2, W2,-W6,-W6, W2,-W2, W6,
	W7,-W5, W3,-W1, W1,-W3, W5,-W7
};

static int idctRowCondDC (int *row, int o)
{
	int a0, a1, a2, a3, b0, b1, b2, b3;

	int     o0 = o, o1 = o+1, o2 = o+2, o3 = o+3,
		o4 = o+4, o5 = o+5, o6 = o+6, o7 = o+7;

	if (row[o1] == 0 && row[o2] == 0 && row[o3] == 0 && row[o4] == 0 && row[o5] == 0 && row[o6] == 0 && row[o7] == 0)
	{
		row[o0] = row[o1] = row[o2] = row[o3] = row[o4] = row[o5] = row[o6] = row[o7] = row[o0]<<3;
		return 0;
	}

	if (row[o4] == 0 && row[o5] == 0 && row[o6] == 0 && row[o7] == 0)
	{
		a0 = W4*row[o0] + W2*row[o2] + (1<<(ROW_SHIFT-1));
		a1 = W4*row[o0] + W6*row[o2] + (1<<(ROW_SHIFT-1));
		a2 = W4*row[o0] - W6*row[o2] + (1<<(ROW_SHIFT-1));
		a3 = W4*row[o0] - W2*row[o2] + (1<<(ROW_SHIFT-1));

		b0 = W1*row[o1] + W3*row[o3];
		b1 = W3*row[o1] - W7*row[o3];
		b2 = W5*row[o1] - W1*row[o3];
		b3 = W7*row[o1] - W5*row[o3];
	}else{
		a0 = W4*row[o0] + W2*row[o2] + W4*row[o4] + W6*row[o6] + (1<<(ROW_SHIFT-1));
		a1 = W4*row[o0] + W6*row[o2] - W4*row[o4] - W2*row[o6] + (1<<(ROW_SHIFT-1));
		a2 = W4*row[o0] - W6*row[o2] - W4*row[o4] + W2*row[o6] + (1<<(ROW_SHIFT-1));
		a3 = W4*row[o0] - W2*row[o2] + W4*row[o4] - W6*row[o6] + (1<<(ROW_SHIFT-1));

		b0 = W1*row[o1] + W3*row[o3] + W5*row[o5] + W7*row[o7];
		b1 = W3*row[o1] - W7*row[o3] - W1*row[o5] - W5*row[o7];
		b2 = W5*row[o1] - W1*row[o3] + W7*row[o5] + W3*row[o7];
		b3 = W7*row[o1] - W5*row[o3] + W3*row[o5] - W1*row[o7];
	}

	row[o0] = (a0 + b0) >> ROW_SHIFT;
	row[o7] = (a0 - b0) >> ROW_SHIFT;
	row[o1] = (a1 + b1) >> ROW_SHIFT;
	row[o6] = (a1 - b1) >> ROW_SHIFT;
	row[o2] = (a2 + b2) >> ROW_SHIFT;
	row[o5] = (a2 - b2) >> ROW_SHIFT;
	row[o3] = (a3 + b3) >> ROW_SHIFT;
	row[o4] = (a3 - b3) >> ROW_SHIFT;
	
	return 1;
}

static void idctSparseCol (int *col, int o)
{
	int a0, a1, a2, a3, b0, b1, b2, b3;

	int o0 = o;
	int o1 = 8*1+o;
	int o2 = 8*2+o;
	int o3 = 8*3+o;
	int o4 = 8*4+o;
	int o5 = 8*5+o;
	int o6 = 8*6+o;
	int o7 = 8*7+o;

	col[o0] += (1<<(COL_SHIFT-1))/W4;
	a0 = W4*col[o0];
	a1 = W4*col[o0];
	a2 = W4*col[o0];
	a3 = W4*col[o0];

	if(col[o2] != 0){
		a0 +=  + W2*col[o2];
		a1 +=  + W6*col[o2];
		a2 +=  - W6*col[o2];
		a3 +=  - W2*col[o2];
	}

	if(col[o4] != 0){
		a0 += + W4*col[o4];
		a1 += - W4*col[o4];
		a2 += - W4*col[o4];
		a3 += + W4*col[o4];
	}

	if(col[o6] != 0){
		a0 += + W6*col[o6];
		a1 += - W2*col[o6];
		a2 += + W2*col[o6];
		a3 += - W6*col[o6];
	}

	if(col[o1] != 0){
		b0 = W1*col[o1];
		b1 = W3*col[o1];
		b2 = W5*col[o1];
		b3 = W7*col[o1];
	}else{
		b0 = 
		b1 = 
		b2 = 
		b3 = 0;
	}

	if(col[o3] != 0){
		b0 += + W3*col[o3];
		b1 += - W7*col[o3];
		b2 += - W1*col[o3];
		b3 += - W5*col[o3];
	}

	if(col[o5] != 0){
		b0 += + W5*col[o5];
		b1 += - W1*col[o5];
		b2 += + W7*col[o5];
		b3 += + W3*col[o5];
	}

	if(col[o7] != 0){
		b0 += + W7*col[o7];
		b1 += - W5*col[o7];
		b2 += + W3*col[o7];
		b3 += - W1*col[o7];
	}

	col[o0] = (a0 + b0) >> COL_SHIFT;
	col[o7] = (a0 - b0) >> COL_SHIFT;
	col[o1] = (a1 + b1) >> COL_SHIFT;
	col[o6] = (a1 - b1) >> COL_SHIFT;
	col[o2] = (a2 + b2) >> COL_SHIFT;
	col[o5] = (a2 - b2) >> COL_SHIFT;
	col[o3] = (a3 + b3) >> COL_SHIFT;
	col[o4] = (a3 - b3) >> COL_SHIFT;
}

static void IDCT_ffmpeg(int *tmpblk)
{
  idctRowCondDC(tmpblk, 0);
  idctRowCondDC(tmpblk, 8);
  idctRowCondDC(tmpblk, 16);
  idctRowCondDC(tmpblk, 24);
  idctRowCondDC(tmpblk, 32);
  idctRowCondDC(tmpblk, 40);
  idctRowCondDC(tmpblk, 48);
  idctRowCondDC(tmpblk, 56);
  
  idctSparseCol(tmpblk, 0);
  idctSparseCol(tmpblk, 1);
  idctSparseCol(tmpblk, 2);
  idctSparseCol(tmpblk, 3);
  idctSparseCol(tmpblk, 4);
  idctSparseCol(tmpblk, 5);
  idctSparseCol(tmpblk, 6);
  idctSparseCol(tmpblk, 7);
}

void IDCT_reference(short *block)
{
  int i; int tmpblk[64];
 
  // from short to int 
  for (i=0;i<64;i++)
    tmpblk[i] = block[i];
  IDCT_ffmpeg(tmpblk);
  // back to short from int 
  for (i=0;i<64;i++)
    block[i] = tmpblk[i];
  
}

void IDCT_test(short *in,short *out)
{
  int i; int tmpblk[64];
 
  // from short to int 
  for (i=0;i<64;i++)
    tmpblk[i] = in[i];
  IDCT_ffmpeg(tmpblk);
  // back to short from int 
  for (i=0;i<64;i++)
    out[i] = tmpblk[i];  
}

void IDCT_init(void)
{
}
