
function TreeMatrix2TreeString (levelIndex, doLengths)
{
	treeString = "";
	_topRecP = 0;
	k = 0;
	m = treeNodes[0][levelIndex+1];
	n = treeNodes[0][levelIndex];

	while (m)
	{	
		if (m>_topRecP)
		{
			if (_topRecP)
			{
				treeString = treeString+",";
			}
			for (j=_topRecP;j<m;j=j+1)
			{
				treeString = treeString+"(";
			}
		}
		else
		{
			if (m<_topRecP)
			{
				for (j=m;j<_topRecP;j=j+1)
				{
					treeString = treeString+")";
				}
			}	
			else
			{
				treeString = treeString+",";
			}	
		}
		if (n<_NUMBER_OF_SEQUENCES)
		{
			GetString (nodeName, INFERENCE_DATA_SET, n);
			treeString = treeString+nodeName;
		}
		if (doLengths>.5)
		{
			treeString = treeString+":"+treeNodes[k][levelIndex+2];
		}
		k=k+1;
		_topRecP=m;
		n=treeNodes[k][levelIndex];
		m=treeNodes[k][levelIndex+1];
	}

	for (j=m;j<_topRecP;j=j+1)
	{
		treeString = treeString+")";
	}
	
	return treeString;
}

ChoiceList (globalParametersOption,"Shared Parameters",1,SKIP_NONE,
			"Estimate every time","Estimate all model parameters for each tree.",
			"Estimates based on NJ tree","Start with a neighbor joining tree, estimate all global parameters based on it, then hold them constant for the rest of the search.");

if (globalParametersOption<0)
{
	return;
}

if (globalParametersOption == 1)
{
	DataSetFilter filteredData = CreateFilter (INFERENCE_DATA_SET,1,"","");
	ChoiceList (methodIndex,"Negative Branch Lengths",1,SKIP_NONE,
				"Keep Negative","Negative Branch Lengths are Allowed.",
				"Force Zero","Negative Branch Lengths are Forced to 0.");			
	
	if (methodIndex < 0)
	{
		return 0;
	}
	
	
	distanceMatrix = {_NUMBER_OF_SEQUENCES,_NUMBER_OF_SEQUENCES};
	
	DISTANCE_PROMPTS = 1;
	incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def";
	ExecuteCommands  ("#include \""+incFileName+"\";");
	InitializeDistances (0);
	DISTANCE_PROMPTS = 0;
	
	for (i = 0; i<_NUMBER_OF_SEQUENCES; i=i+1)
	{
		for (j = i+1; j<_NUMBER_OF_SEQUENCES; j = j+1)
		{
			distanceMatrix[i][j] = ComputeDistanceFormula (i,j);
		}
	}
	
	MESSAGE_LOGGING 		 	= 1;
	cladesMade 					= 1;
	
	
	if (_NUMBER_OF_SEQUENCES == 2)
	{
		d1 = distanceMatrix[0][1]/2;
		treeNodes = {{0,1,d1__},
					 {1,1,d1__},
					 {2,0,0}};
					 
		cladesInfo = {{2,0}};
	}
	else
	{
		if (_NUMBER_OF_SEQUENCES == 3)
		{
			d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2;
			d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2;
			d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2;
			treeNodes = {{0,1,d1__},
						 {1,1,d2__},
						 {2,1,d3__}
						 {3,0,0}};
						 
			cladesInfo = {{3,0}};		
		}
		else
		{	
			njm = (distanceMatrix > methodIndex)>=_NUMBER_OF_SEQUENCES;
				
			treeNodes 		= {2*(_NUMBER_OF_SEQUENCES+1),3};
			cladesInfo	    = {_NUMBER_OF_SEQUENCES-1,2};
			
			for (i=Rows(treeNodes)-1; i>=0; i=i-1)
			{
				treeNodes[i][0] = njm[i][0];
				treeNodes[i][1] = njm[i][1];
				treeNodes[i][2] = njm[i][2];
			}
	
			for (i=Rows(cladesInfo)-1; i>=0; i=i-1)
			{
				cladesInfo[i][0] = njm[i][3];
				cladesInfo[i][1] = njm[i][4];
			}
			
			njm = 0;
		}
	}
	
	distanceMatrix = 0;
	
	/* now with the treeNodes matrix ready we can convert it into a Newick string */
	
	treeString = TreeMatrix2TreeString (0,1);
	UseModel (USE_NO_MODEL);
	Tree Inferred_Tree = treeString; 
	
	_all_sequence_matrix = {_NUMBER_OF_SEQUENCES,1};
	
	for (i=0; i<_NUMBER_OF_SEQUENCES; i=i+1)
	{
		_all_sequence_matrix [i][0] = i;
	}
	
	SpawnLikelihoodFunction ("_INF_LF_", "Inferred_Tree", INFERENCE_DATA_WINDOW, _all_sequence_matrix);
	Optimize (res,_INF_LF_);
	GetString (_gvc,_INF_LF_,-1);
	_gvc = _gvc ["Global Independent"];
	_cString = "";
	for (i=0; i<Columns (_gvc); i=i+1)
	{
		ExecuteCommands ("cv = " + _gvc[i]);
		_cString = _cString + (_gvc[i] + ":=" + cv +";");
	}
}

#include "Support/branchSwappingFunctions.bf";

MESSAGE_LOGGING = 0;
twHasBeenOpened = 0;

ChoiceList (randomOption,"Addition Order",1,SKIP_NONE,
			"Given Order","The sequences will be added in the order they appear in the data file.",
			"Random Order","The order of addition will be random.");


if (randomOption<0)
{
	return;
}

ChoiceList (doNNIOption,"Branch Swapping",1,SKIP_NONE,
			"No Swapping","No branch swapping is performed.",
			"Complete NNI","Nearest neighbor interchange is performed after EACH sequence is added. Order (sequences)^2 additional trees are examined.",
			"Complete SPR","Subtree pruning and regrafting is performed after EACH sequence is added. Order (sequences)^3 additional trees are examined.",
			"Global NNI","Nearest neighbor interchange is performed after ALL the sequences have been added. Order (sequences)^1 additional trees are examined.",
			"Global SPR","Subtree pruning and regrafting is performed after ALL the sequences have been added. Order (sequences)^2 additional trees are examined.",
			"NNI+SPR","Nearest neighbor interchange is performed after EACH sequence is added. Subtree pruning and regrafting performed on the final tree. Order (sequences)^2 additional trees are examined.");


if (doNNIOption<0)
{
	return;
}

if (doNNIOption == 1)
{
	nniPeriod = 0;
	while (nniPeriod <= 0)
	{
		fprintf (stdout, "\nDo NNI every time this many branches are added (>=1):");
		fscanf  (stdin, "Number", nniPeriod);
	}
}

ChoiceList (methodIndex,"Starting 3 taxa tree",1,SKIP_NONE,
			"First 3","The starting 3 taxa tree will comprise first 3 taxa from the data file.",
			"Choose 3","User selects 3 taxa for the starting 3 taxa tree.",
			"Best 3","The starting 3 taxa tree will be chosen by selecting the ML estimation among all possible 3 taxa trees. Warning: there are O(n^3) 3 taxa trees for n sequences.",
			"Random","Select 3 random starting sequences.");

/* begin by selecting the best 3-taxa tree */
if (methodIndex<0)
{
	return;
}

first3Taxa = {3,1};

if (methodIndex == 0)
{
	first3Taxa[0]=0;
	first3Taxa[1]=1;
	first3Taxa[2]=2;
}

if (methodIndex == 1)
{
	ChoiceList (first3Taxa, "Choose 3 taxa for the starting tree:",3,SKIP_NONE,INFERENCE_DATA_SET);
	if (first3Taxa[0]<0)
	{
		return ;
	}
}



function InferTreeTopology (verbFlag)
{
	saveVL			= VERBOSITY_LEVEL;
	VERBOSITY_LEVEL = -1;
	if (_NUMBER_OF_SEQUENCES <= 3)
	{
		fprintf (stdout, "Input file contained 3 or fewer sequences - not much inference to be done!\n");
		return 0;
	}
	if (randomOption==1)
	/* randomize sequence order */
	{
		treeNodes  = {_NUMBER_OF_SEQUENCES,1};
		cladesInfo = {_NUMBER_OF_SEQUENCES,1};
		
		for (currentTaxon=0; currentTaxon < _NUMBER_OF_SEQUENCES; currentTaxon = currentTaxon+1)
		{
			cladesInfo [currentTaxon] = currentTaxon;	
		}
		
		currentTaxon = _NUMBER_OF_SEQUENCES-1;
		
		while (currentTaxon>=0)
		{
			_topRecI = Random (0,currentTaxon+1) $ 1;
			shift = 0;
			for (_topRecS = 0; _topRecS<_NUMBER_OF_SEQUENCES; _topRecS=_topRecS+1)
			{
				if (cladesInfo[_topRecS]>=0)
				{
					shift = shift+1;
				}
				if (_topRecI == shift - 1)
				{
					treeNodes[_NUMBER_OF_SEQUENCES-1-currentTaxon] = cladesInfo[_topRecS];
					cladesInfo [_topRecS] = -1;
					break;
				}
			}
			currentTaxon = currentTaxon - 1;
		}
		
		/*reshuffledOrder = "";
		for (currentTaxon = 0; currentTaxon < _NUMBER_OF_SEQUENCES-1; currentTaxon = currentTaxon+1)
		{
			reshuffledOrder = reshuffledOrder + treeNodes[currentTaxon] + ",";
		}
		reshuffledOrder = reshuffledOrder + treeNodes[currentTaxon];*/
		
		if (verbFlag)
		{
			fprintf (stdout,"\n\t Sequence order randomized as follows:\n");
			for (currentTaxon = 0; currentTaxon < _NUMBER_OF_SEQUENCES; currentTaxon = currentTaxon+1)
			{
				GetString (nodeName, INFERENCE_DATA_SET, treeNodes[currentTaxon]);
				fprintf (stdout,nodeName,"\n");
			}
			fprintf (stdout,"\n");
		}
		
				
		additionOrder = treeNodes;
		if (methodIndex == 0)
		{
			first3Taxa[0] = additionOrder[0];
			first3Taxa[1] = additionOrder[1];
			first3Taxa[2] = additionOrder[2];
		}
	}
	else
	{
		additionOrder = {_NUMBER_OF_SEQUENCES,1};
		for (currentTaxon = 0; currentTaxon < _NUMBER_OF_SEQUENCES; currentTaxon = currentTaxon+1)
		{
			additionOrder [currentTaxon] = currentTaxon;
		}
	}

	treeNodes 		= {2*(_NUMBER_OF_SEQUENCES+1),4};
	bestTreeNodes 	= {2*(_NUMBER_OF_SEQUENCES+1),2};

	cladesInfo 	    = {_NUMBER_OF_SEQUENCES,4};
	bestCladesInfo  = {_NUMBER_OF_SEQUENCES,2};

	currentTaxon = 0;

	treeNodes[0][0]=0;
	treeNodes[0][1]=1;
	treeNodes[1][0]=1;
	treeNodes[1][1]=1;
	treeNodes[2][0]=2;
	treeNodes[2][1]=1;
	treeNodes[3][0]=_NUMBER_OF_SEQUENCES;
	treeNodes[3][1]=0;
	cladesInfo[0][0]=0;
	cladesInfo[0][1]=4;

	bestTree="";
	bestValue=-1e20;


	sulr = USE_LAST_RESULTS;

	USE_LAST_RESULTS = 1;

	if (methodIndex == 2)
	{
		if (verbFlag)
		{
			fprintf (stdout, "\nSTEP 0). Searching for the best 3 taxa tree.");
			fprintf (stdout, "\n\t", Format (_NUMBER_OF_SEQUENCES*(_NUMBER_OF_SEQUENCES-1)*(_NUMBER_OF_SEQUENCES-2)/6,0,0)," trees will be examined.\n");
		}
		for (_topRecI=0; _topRecI<_NUMBER_OF_SEQUENCES-2; _topRecI=_topRecI+1)
		{
			for (_topRecS=_topRecI+1;_topRecS<_NUMBER_OF_SEQUENCES-1;_topRecS=_topRecS+1)
			{
				for (shift=_topRecS+1;shift<_NUMBER_OF_SEQUENCES; shift=shift+1)
				{
					treeNodes[0][0]=_topRecI;
					treeNodes[0][1]=1;
					treeNodes[1][0]=_topRecS;
					treeNodes[1][1]=1;
					treeNodes[2][0]=shift;
					treeNodes[2][1]=1;		
					thisTree = TreeMatrix2TreeString (0,0);
					Tree    Inferred_Tree = thisTree;
					
					_subset_to_filter = {{_topRecI}{_topRecS}{shift}};
					
					SpawnLikelihoodFunction ("_INF_LF_", "Inferred_Tree", INFERENCE_DATA_WINDOW, _subset_to_filter);
					if (globalParametersOption && Abs(_cString))
					{	
						ExecuteCommands (_cString);	
					}
					Optimize (res,_INF_LF_);
					
					if (res[1][0]>bestValue+OPTIMIZATION_PRECISION)
					{
						first3Taxa[0]=_topRecI;
						first3Taxa[1]=_topRecS;
						first3Taxa[2]=shift;
						
						bestValue = res[1][0];
						bestTree = thisTree;
					}
				}
			}
		}

		if (verbFlag)
		{
			fprintf (stdout,"\n\t Best 3 taxa tree is ", bestTree, " with log likelihood of ", bestValue);
		}
	}
	else
	{
		if (methodIndex == 3)
		{
			first3Taxa[0] = Random (0,_NUMBER_OF_SEQUENCES) $ 1;
			first3Taxa[1] = first3Taxa[0];
			while (first3Taxa[1] == first3Taxa[0])
			{
				first3Taxa[1] = Random (0,_NUMBER_OF_SEQUENCES) $ 1;
			}
			first3Taxa[2] = first3Taxa[0];
			while ((first3Taxa[2] == first3Taxa[0])||(first3Taxa[2] == first3Taxa[1]))
			{
				first3Taxa[2] = Random (0,_NUMBER_OF_SEQUENCES) $ 1;
			}
			if (verbFlag)
			{
				fprintf (stdout,"\n\t Random 3 taxa tree selected: (");
				for (currentTaxon = 0; currentTaxon < 3; currentTaxon = currentTaxon+1)
				{
					GetString (nodeName, INFERENCE_DATA_SET, first3Taxa[currentTaxon]);
					fprintf (stdout,nodeName);
					if (currentTaxon<2)
					{
						fprintf (stdout, ",");
					}
				}
				fprintf (stdout,")\n");
			}
		}
	}

	treeNodes[0][0]=first3Taxa[0];
	treeNodes[1][0]=first3Taxa[1];
	treeNodes[2][0]=first3Taxa[2];

	currentTaxon = 0;
	taxonCounter = 3;
	
	_subset_to_filter = {{first3Taxa[0]}{first3Taxa[1]}{first3Taxa[2]}};

	while (currentTaxon<_NUMBER_OF_SEQUENCES)
	{
		taxonIndex = additionOrder [currentTaxon];
		/* decide which taxon will be added next */
		if ((taxonIndex!=first3Taxa[0])&&(taxonIndex!=first3Taxa[1])&&(taxonIndex!=first3Taxa[2]))
		{
			GetString (nodeName, INFERENCE_DATA_SET, taxonIndex);
			
			if (verbFlag)
			{
				fprintf (stdout, "\n\n\nSTEP ",Format(taxonCounter-2,0,0),"/",Format(_NUMBER_OF_SEQUENCES-3,0,0),"). Adding taxon:", nodeName);
			}
			/* attempt to insert this taxon into current tree */
			/* produce a new data set filter */
			
			_swp_mx = _subset_to_filter;
			
			_subset_to_filter = {Rows (_swp_mx)+1,1};
			
			for (mx_counter = 0; mx_counter < Rows (_swp_mx); mx_counter = mx_counter + 1)
			{	
				_subset_to_filter [mx_counter] = _swp_mx [mx_counter];
			}
			
			_subset_to_filter [mx_counter] = taxonIndex;

			bestValue=-1e20;
			for (t_node=0; t_node<2*taxonCounter-3; t_node=t_node+1)
			/* loop over all branches */
			{	
				_topRecI = 0;
				shift = 0;
				while (treeNodes[_topRecI][1])
				{
					if (_topRecI==t_node)
					{
						currentLevel = taxonCounter-3;
						shift = 2;
						if (treeNodes[_topRecI][0]<_NUMBER_OF_SEQUENCES)
						/* simple branch */
						{
							treeNodes[_topRecI][2]=treeNodes[_topRecI][0];
							treeNodes[_topRecI][3]=treeNodes[_topRecI][1]+1;
							treeNodes[_topRecI+1][2]=taxonIndex;
							treeNodes[_topRecI+1][3]=treeNodes[_topRecI][1]+1;
							treeNodes[_topRecI+2][2]= taxonCounter+ _NUMBER_OF_SEQUENCES-3;
							treeNodes[_topRecI+2][3]=treeNodes[_topRecI][1];
							cladesInfo[currentLevel][2] = _topRecI;
							cladesInfo[currentLevel][3] = 3;					
						}
						else
						{
							/* update node depths for the entire clade now*/
							_topRecL = treeNodes[_topRecI][0]-_NUMBER_OF_SEQUENCES;
							_topRecS = cladesInfo[_topRecL][0];
							/*fprintf (stdout,"\n",_topRecI," ",cladesInfo[_topRecL][1],treeNodes, cladesInfo,"\n");*/
							for (_topRecP=_topRecS+cladesInfo[_topRecL][1]-1; _topRecP>=_topRecS; _topRecP=_topRecP-1)
							{
								treeNodes[_topRecI][2]=treeNodes[_topRecI][0];
								treeNodes[_topRecI][3]=treeNodes[_topRecI][1]+1;						
								_topRecI=_topRecI-1;
							}
							_topRecI=_topRecI+cladesInfo[_topRecL][1];
							/* new clade record */
							cladesInfo[currentLevel][2] = cladesInfo[_topRecL][0];
							cladesInfo[currentLevel][3] = cladesInfo[_topRecL][1]+2;
							/* now we need to insert two more nodes */
							treeNodes[_topRecI+1][2]=taxonIndex;
							treeNodes[_topRecI+1][3]=treeNodes[_topRecI][1]+1;
							treeNodes[_topRecI+2][2]=currentLevel+_NUMBER_OF_SEQUENCES;
							treeNodes[_topRecI+2][3]=treeNodes[_topRecI][1];
						}
						for (_topRecP=0; _topRecP<currentLevel; _topRecP=_topRecP+1)
						{
							if (cladesInfo[_topRecP][0]>_topRecI)
							{
								cladesInfo[_topRecP][2] = cladesInfo[_topRecP][0]+2;
							}
							else
							{
								cladesInfo[_topRecP][2] = cladesInfo[_topRecP][0];
							}
							
							if ((cladesInfo[_topRecP][0]<=_topRecI)&&((cladesInfo[_topRecP][0]+cladesInfo[_topRecP][1])>_topRecI+1))
							{
								cladesInfo[_topRecP][3] = cladesInfo[_topRecP][1]+2;
							}
							else
							{
								cladesInfo[_topRecP][3] = cladesInfo[_topRecP][1];
							}
						}				
					}
					else
					{
						treeNodes[_topRecI+shift][2]=treeNodes[_topRecI][0];
						treeNodes[_topRecI+shift][3]=treeNodes[_topRecI][1];
					}
					_topRecI = _topRecI+1;
				}
				
				treeNodes[_topRecI+2][2]=treeNodes[_topRecI][0];
				treeNodes[_topRecI+2][3]=treeNodes[_topRecI][1];
				
				thisTree = TreeMatrix2TreeString (2,0);
				
				if (verbFlag)
				{
					fprintf (stdout, "\n\tTree ",thisTree);
				}
				Tree    Inferred_Tree = thisTree;

				SpawnLikelihoodFunction ("_INF_LF_", "Inferred_Tree", INFERENCE_DATA_WINDOW, _subset_to_filter);
				if (globalParametersOption && Abs(_cString))
				{	
					ExecuteCommands (_cString);	
				}
				Optimize (res,_INF_LF_);

				if (res[1][0]>bestValue+OPTIMIZATION_PRECISION)
				{
					bestValue = res[1][0];
					bestTree = thisTree;
					for (_topRecL=2*taxonCounter-2;_topRecL>=0;_topRecL=_topRecL-1)
					{
						bestTreeNodes[_topRecL][0]=treeNodes[_topRecL][2];
						bestTreeNodes[_topRecL][1]=treeNodes[_topRecL][3];
					}
					for (_topRecL=0; _topRecL<taxonCounter-2; _topRecL=_topRecL+1)
					{
						bestCladesInfo[_topRecL][0]=cladesInfo[_topRecL][2];
						bestCladesInfo[_topRecL][1]=cladesInfo[_topRecL][3];
					}
				}
				if (verbFlag)
				{
					fprintf (stdout, " => ", res[1][0]);
				}
			}
			for (_topRecL=2*taxonCounter-2;_topRecL>=0;_topRecL=_topRecL-1)
			{
				treeNodes[_topRecL][0]=bestTreeNodes[_topRecL][0];
				treeNodes[_topRecL][1]=bestTreeNodes[_topRecL][1];
			}
			for (_topRecL=0; _topRecL<taxonCounter-2; _topRecL=_topRecL+1)
			{
				cladesInfo[_topRecL][0]=bestCladesInfo[_topRecL][0];
				cladesInfo[_topRecL][1]=bestCladesInfo[_topRecL][1];
			}
			if (verbFlag)
			{
				fprintf (stdout,"\n\n\t Best ", Format(taxonCounter+1,0,0)," taxa tree is ", bestTree, " with log likelihood of ", bestValue);
			}
			if (((doNNIOption<=2)||((doNNIOption>2)&&(taxonCounter==_NUMBER_OF_SEQUENCES-1)))&&(taxonCounter>3)&&(doNNIOption)||(doNNIOption==5))
			{
				starDecomposition = 0;
				if ((doNNIOption==1)||(doNNIOption==3)||((doNNIOption==5)&&(taxonCounter<_NUMBER_OF_SEQUENCES-1)))
				/* NNI */
				{
					if (((doNNIOption==1 && (taxonCounter-2)%nniPeriod && taxonCounter<_NUMBER_OF_SEQUENCES-1))==0)
					{
						#include "Support/doNNISwap.bf";
					}
				}
				else
				/* SPR */
				{
					#include "Support/doSPRSwap.bf";
				}
			}
			taxonCounter = taxonCounter+1;
		}
		currentTaxon = currentTaxon+1;
	}
	USE_LAST_RESULTS	= sulr;
	VERBOSITY_LEVEL		= saveVL;
	return 1.0;
}

_topRecL = InferTreeTopology (1.0);

if (_topRecL)
{
	Tree	Inferred_Tree = bestTree;
	SpawnLikelihoodFunction ("_INF_LF_", "Inferred_Tree", INFERENCE_DATA_WINDOW, _subset_to_filter);

	saveTreeNodes = {2*(_NUMBER_OF_SEQUENCES+1),3};
	for (_topRecI=2*_NUMBER_OF_SEQUENCES+1;_topRecI>=0;_topRecI=_topRecI-1)
	{
		saveTreeNodes[_topRecI][0] = bestTreeNodes[_topRecI][0];
		saveTreeNodes[_topRecI][1] = bestTreeNodes[_topRecI][1];
	}
}


