/* This is an example HY-PHY Batch File.



   It reads in a '#' nucleotide dataset data/hiv.nuc and estimates

   maximum ln-likelihood based on the tree contained in the data file,

   using the General Reversible model with the parameters

   shared by all branches.

   

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in a DataSet variable.*/



DataSet 		nucleotideSequences = ReadDataFile ("data/hiv.nuc");

   

/* 2. Filter the data, specifying that all of the data is to be used

	  and that it is to be treated as nucleotides.*/

	  

DataSetFilter	filteredData = CreateFilter (nucleotideSequences,1);



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1);



/* 4. Define the KHY substitution matrix. '*' is defined to be -(sum of off-diag row elements).

	  AG,AT,CG,CT,GT are the shared parameters, representing the ratio
	  of corresponding substitution rates to the AC rate.
	  t is the "branch length"*/



global AG;

global AT;

global CG;

global CT;

global GT;



REVRateMatrix = 

		{{*,t,AG*t,AT*t}

		 {t,*,CG*t,CT*t}

		 {AG*t,CG*t,*,GT*t}

		 {AT*t,CT*t,GT*t,*}};

		 

/*5.  Define the REV model, by combining the substitution matrix with the vector of observed (equilibrium)

	  frequencies. */

	  

Model REV	 = (REVRateMatrix, observedFreqs);



/*6.  Now we can define the tree variable, using the tree string read from the data file,

	  and, by default, assigning the last defined model (REV) to all tree branches. */

	  

Tree	givenTree = DATAFILE_TREE;



/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)

	  have been defined we are ready to construct the likelihood function. */

	  

LikelihoodFunction  theLnLik = (filteredData, givenTree);



/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues */



Optimize (paramValues, theLnLik);



/*9.  Print the tree with optimal branch lengths to the console. */



fprintf  (stdout, theLnLik);

		 

   

