//
// ccealign -- structural alignment plugin modele for PyMol
//

//////////////////////////////////////////////////////////////////////////////
//
//  Copyright (c) 2007, Jason Vertrees.
//  All rights reserved.
//
//  Redistribution and use in source and binary forms, with or without
//  modification, are permitted provided that the following conditions are
//  met:
//
//      * Redistributions of source code must retain the above copyright
//      notice, this list of conditions and the following disclaimer.
//
//      * Redistributions in binary form must reproduce the above copyright
//      notice, this list of conditions and the following disclaimer in
//      the documentation and/or other materials provided with the
//      distribution.
//
//  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
//  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
//  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
//  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
//  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
//  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
//  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
//  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
//  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
//  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
//  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//////////////////////////////////////////////////////////////////////////////

#include "ccealignmodule.h"

/////////////////////////////////////////////////////////////////////////////
//
/////////////////////////////////////////////////////////////////////////////
static PyObject*
ccealign_ccealign(PyObject* self, PyObject* args)
{
	// CE constants
	const double windowSize = 8.0;

	PyObject *listA, *listB;

	if ( ! PyArg_ParseTuple(args, "(OO)", &listA, &listB) ) {
		printf("Could not unparse objects\n");
		return NULL;
	}

	// let Python know we made two lists
	Py_INCREF(listA);
	Py_INCREF(listB);

	// handle empty selections (should probably do this in Python)
	const int lenA = PyList_Size(listA);
	if ( lenA < 1 ) {
		printf("CEALIGN ERROR: First selection didn't have any atoms.  Please check your selection.\n");
		return NULL;
	}

	const int lenB = PyList_Size(listB);
	if ( lenA < 1 ) {
		printf("CEALIGN ERROR: Second selection didn't have any atoms.  Please check your selection.\n");
		return NULL;
	}

	// get the coodinates from the Python objects
	pcePoint coordsA = getCoords( listA, lenA );
	pcePoint coordsB = getCoords( listB, lenB );

	// calculate the distance matrix for each protein
  	double** dmA = calcDM(coordsA, lenA);
  	double** dmB = calcDM(coordsB, lenB);

	// calculate the CE Similarity matrix
	double **S = calcS(dmA, dmB, lenA-windowSize, lenB-windowSize, windowSize);

	// find the best path through the CE Sim. matrix
	PyObject* paths = findPath( S, dmA, dmB, lenA, lenB, (int) windowSize);
	Py_INCREF(paths);

	// let Python remove the lists
	Py_DECREF(listA);
	Py_DECREF(listB);
	
	// release memory
	free(coordsA);
	free(coordsB);
	free(dmA);
	free(dmB);
	free(S);

	return paths;
}


/////////////////////////////////////////////////////////////////////////////
//
// CE Specific
//
/////////////////////////////////////////////////////////////////////////////
double** calcDM(pcePoint coords, int len)
{
	int i = 0;

	// FIXME: free this memory
	double** dm = malloc(sizeof(double)*len);

	// FIXME: free this memory
	for ( i = 0; i < len; i++ )
		dm[i] = (double*) malloc( sizeof(double)*len);

	int row=0, col=0;
	for ( row = 0; row < len; row++ ) {
		for ( col = 0; col < len; col++ ) {
			dm[row][col] = sqrt( pow( coords[row].x - coords[col].x ,2) +
						pow(coords[row].y - coords[col].y,2) +
						pow(coords[row].z - coords[col].z,2) );
		}
	}
	return dm;
}

/////////////////////////////////////////////////////////////////////////////
//
/////////////////////////////////////////////////////////////////////////////
double** calcS(double** d1, double** d2, int lenA, int lenB, double winSize)
{
	int i;

	// FIXME: free this memory
	double** S = malloc(sizeof(double)*lenA);

	// FIXME: free this memory
	for ( i = 0; i < lenA; i++ )
		S[i] = (double*) malloc( sizeof(double)*lenB);

	double sumSize = (winSize-1.0)*(winSize-2.0) / 2.0;
	//
	// This is where the magic of CE comes out.  In the similarity matrix,
	// for each i and j, the value of ceSIM[i][j] is how well the residues
	// i - i+winSize in protein A, match to residues j - j+winSize in protein
	// B.  A value of 0 means absolute match; a value >> 1 means bad match.
	//
	int iA, iB, row, col;
	for ( iA = 0; iA < lenA; iA++ ) {
		for ( iB = 0; iB < lenB; iB++ ) {
			double score = 0.0;
			
			//
			// We always skip the calculation of the distance from THIS
			// residues, to the next residue.  This is a time-saving heur-
			// istic decision.  Almost all alpha carbon bonds of neighboring
			// residues is 3.8 Angstroms.  Due to entropy, S = -k ln pi * pi,
			// this tell us nothing, so it doesn't help so ignore it.
			//
			for ( row = 0; row < (int) winSize - 2; row++ ) {
				for ( col = row + 2; col < (int) winSize; col++ ) {
					score += fabs( d1[iA+row][iA+col] - d2[iB+row][iB+col] );
				}
			}

			S[iA][iB] = score / sumSize;
		}
	}
	return S;
}



/////////////////////////////////////////////////////////////////////////////
//
/////////////////////////////////////////////////////////////////////////////
pcePoint getCoords( PyObject* L, int length )
{
	// make the lists for the XYZ coordinates
	//double coordsA[lenA][3], coordsB[lenB][3];
	//FIXME: FREE this memory!
	pcePoint coords = (pcePoint) malloc(sizeof(cePoint)*length);

	// loop through the arguments, pulling out the
	// XYZ coordinates.
	int i;
	for ( i = 0; i < length; i++ ) {
		PyObject* curCoord = PyList_GetItem(L,i);
		Py_INCREF(curCoord);

		PyObject* curVal = PyList_GetItem(curCoord,0);
		Py_INCREF(curVal);
		coords[i].x = PyFloat_AsDouble(curVal);
		Py_DECREF(curVal);

		curVal = PyList_GetItem(curCoord,1);
		Py_INCREF(curVal);
		coords[i].y = PyFloat_AsDouble(curVal);
		Py_DECREF(curVal);

		curVal = PyList_GetItem(curCoord,2);
		Py_INCREF(curVal);
		coords[i].z = PyFloat_AsDouble(curVal);
		Py_DECREF(curVal);

		Py_DECREF(curCoord);
	}

	return coords;
}



/////////////////////////////////////////////////////////////////////////////
//
/////////////////////////////////////////////////////////////////////////////
PyObject* findPath( double** S, double** dA, double** dB, int lenA, int lenB, int winSize )
{
	// CE-specific cutoffs
	const double D0 = 3.0;
	const double D1 = 4.0;

	const int MAX_KEPT = 20;
	const int gapMax = 30;

	// the best Path's score
	double bestPathScore = 1e6;
	int bestPathLength = 0;

	int smaller = ( lenA < lenB ) ? lenA : lenB;
	smaller /= winSize;
	smaller += 1;

	int bufferBest = 0;
	int lenBuffer[MAX_KEPT];

	double scoreBuffer[MAX_KEPT];

	//
	// BEST PATH
	//
	// FIXME: free memory
	path bestPath = (path) malloc( sizeof(afp)*smaller );
	int i;
	for ( i = 0; i < smaller; i++ ) {
		bestPath[i].first = -1;
		bestPath[i].second = -1;
	}


	//======================================================================
	// for storing the best 20 paths
	// FIXME: free memory
	pathCache pathBuffer = (pathCache) malloc(sizeof(path)*MAX_KEPT);
	for ( i = 0; i < MAX_KEPT; i++ ) {
		// make the paths
		scoreBuffer[i] = 1e6;
		lenBuffer[i] = 0;
	}

	//======================================================================
	//
	//
	int iA, iB;
	for ( iA = 0; iA < lenA-winSize; iA++ ) {
		if ( iA > lenA - winSize*(bestPathLength-1) )
			break;
			
		for ( iB = 0; iB < lenB-winSize; iB++ ) {
			if ( S[iA][iB] >= D0 )
				continue;

			if ( iB > lenB - winSize*(bestPathLength-1) )
				break;

			//
			// Restart curPath here.
			//
			path curPath = malloc( sizeof(afp)*smaller );
			int i;
			for ( i = 0; i < smaller; i++ ) {
				curPath[i].first = -1;
				curPath[i].second = -1;
			}
			curPath[0].first = iA;
			curPath[0].second = iB;
			int curPathLength = 1;
			double curTotalScore = 0.0;

			//
			// Check all possible paths starting from iA, iB
			//
			int done = 0;
			while ( ! done ) {
				
				double gapBestScore = 1e6;
				int g;

				//
				// Check all possible gaps [1..gapMax] from here
				//
				for ( g = 0; g < (gapMax*2)+1; g++ ) {
					 int jA = curPath[curPathLength-1].first + winSize;
					 int jB = curPath[curPathLength-1].second + winSize;

					if ( (g+1) % 2 == 0 ) {
						jA += (g+1)/2;
					}
					else { // ( g odd )
						jB += (g+1)/2;
					}

					if ( jA > lenA-winSize || jB > lenB-winSize ) {
						continue;
					}
					
					double curScore = 0.0;
					int s;
					for ( s = 0; s < curPathLength; s++ ) {
						curScore += fabs( dA[curPath[s].first][jA] - dB[curPath[s].second][jB] );
						curScore += fabs( dA[curPath[s].first  + (winSize-1)][jA+(winSize-1)] - 
										  dB[curPath[s].second + (winSize-1)][jB+(winSize-1)] );
						int k;
						for ( k = 1; k < winSize-1; k++ )
							curScore += fabs( dA[curPath[s].first  + k][ jA + (winSize-1) - k ] - 
											  dB[curPath[s].second + k][ jB + (winSize-1) - k ] );
					}
					
					curScore /= (double) winSize * (double) curPathLength;

					if ( curScore >= D1 ) {
						continue;
					}
					
					if ( curScore < gapBestScore ) {
						curPath[curPathLength].first = jA;
						curPath[curPathLength].second = jB;
						gapBestScore = curScore;
					}
				}
				
				// calculate curTotalScore
				curTotalScore = 0.0;
				int p, wI, wJ;
				for ( p = 0; p < curPathLength; p++ ) {
					for ( wI = 0; wI < winSize; wI++ ) {
						for ( wJ = 0; wJ < winSize; wJ++ ) {
							curTotalScore += fabs( dA[ curPath[p].first+wI ][ curPath[p].first+wJ ] - 
												   dB[ curPath[p].second+wI][ curPath[p].second+wJ] );
						}
					}
				}
						
				curTotalScore /= (double) curPathLength * (double)(winSize*winSize);
						
				if ( curTotalScore >= D1 ) {
					done = 1;
					break;
				}

				if ( curPath[curPathLength].first == -1 ) {
					done = 1;
					break;
				}
				else {
					curPathLength++;
				}

			}


			if ( curPathLength > bestPathLength ||
				(curPathLength == bestPathLength && curTotalScore < bestPathScore )) {
				bestPathLength = curPathLength;
				bestPathScore = curTotalScore;
				// deep copy curPath
				// FIXME: free memory
				path tempPath = (path) malloc( sizeof(afp)*smaller );
				int i;
				for ( i = 0; i < smaller; i++ ) {
					tempPath[i].first = curPath[i].first;
					tempPath[i].second = curPath[i].second;
				}
				if ( bestPath )
					free(bestPath);
				bestPath = tempPath;
			}

			free(curPath);
			
			if ( curPathLength > lenBuffer[bufferBest] ||
				( curPathLength == lenBuffer[bufferBest] &&
					curTotalScore < scoreBuffer[bufferBest] )) {
				bufferBest = ( bufferBest == MAX_KEPT-1 ) ? 1 : bufferBest+1;
				// FIXME: free memory
				path pathCopy = (path) malloc( sizeof(afp)*smaller );
				int i;
				for ( i = 0; i < smaller; i++ ) {
					pathCopy[i].first = bestPath[i].first;
					pathCopy[i].second = bestPath[i].second;
				}
				pathBuffer[bufferBest-1] = pathCopy;
				scoreBuffer[bufferBest-1] = curTotalScore;
				lenBuffer[bufferBest-1] = curPathLength;
			}
		}
	}

	// list of list of pairs
	PyObject* rVal = PyList_New(0);
	Py_INCREF(rVal);

	i = 0;
	while ( i < bufferBest+1 )
	//while ( i < MAX_KEPT-1 )
	{
		// create this list and insert it
		PyObject* curList = PyList_New(0);
		Py_INCREF(curList);

		int j = 0;
		while ( j < smaller )
		{
			if ( pathBuffer[i][j].first != -1 )
			{
				int k = 0;
				while ( k++ < winSize )
				{
					//printf( "%d-%d\n", pathBuffer[i][j].first+k, pathBuffer[i][j].second+k );
					PyObject* curPair = Py_BuildValue( "[i,i]", pathBuffer[i][j].first+k, pathBuffer[i][j].second+k );
					Py_INCREF(curPair);
		
					PyList_Append(curList,curPair);
				}
			}
			j++;
		}
		PyList_Append(rVal, curList);
		i++;
	}

	free(bestPath);
	free(pathBuffer);

	return rVal;
}



/////////////////////////////////////////////////////////////////////////////
//
// Python specific setup
//
/////////////////////////////////////////////////////////////////////////////
static PyMethodDef CEMethods[] = {
	{"ccealign", ccealign_ccealign, METH_VARARGS, "Align two proteins using the CE Algorithm."},
	{NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC
initccealign(void)
{
	(void) Py_InitModule("ccealign", CEMethods);
}

int
main(int argc, char* argv[])
{
	Py_SetProgramName(argv[0]);
	Py_Initialize();
	initccealign();
	return(EXIT_SUCCESS);
}
