import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.picard.util.SamLocusIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
public class ExonCoverage5 {
	private static String bamPathKey = "bamPath";
	private static String gffKey = "gffFile";
	private static String outPathKey = "outDirPath";
	private static String separator = "=";
	private static String minCovKey = "minCov";
	private static String chromListKey = "chromosomeList";
	private static String bamListKey = "bamFileList";
	private static String outFormatKey= "extendedFmt";
	private static String lostCutoffKey="lostCutoff";
	private static String covCatKey="covCats";
	private static String help = "Command line options:\n" + 
			"Required:\n" + 
			"bamPath - path to your bam file/files, has to end with / or \\ bamPath=/home/my_bams/\n" + 
			"bamFileList  a single .bam file or a comma separated list, only file names, bam and corresponding .bai files have to be in a directory provided in bamPath bamFileList=bam1.bam,bam2.bam\n" + 
			"gffFile  location of gff3 file gffFile=/home/my_gffs/annot.gff3\n" + 
			"outDirPath  location output directory, has to end with / or \\ outDirPath=/home/my_results\n" + 
			"Optional\n" + 
			"minCov  minimal coverage threshold to consider position covered [minCov=1]\n" + 
			"chromosomeList  comma separated list of chromosomes to be used for analysis, use all, for all chromosomes [chromosomeList=all]\n" + 
			"lostCutoff  coverage cutoff to consider gene as lost for calculating stats [lostCutoff=0.0]\n" + 
			"covCats  coverage categories for visualization [cavCats=0,10,20,30,40,70]\n" + 
			"extendedFmt  used extended format, additional info included in output files [regular format] \n" + 
			"\n" + 
			"Please make sure that all your supplied paths end with / or \\\n" + 
			"If memory consumption is a problem please consider splitting your .bam files\n" + 
			"Sample commands:\n" + 
			"java -Xmx4g -jar SGSGeneLoss.jar bamPath=/home/uqagnieszka/bams/ bamFileList=arabidopsis.sorted.bam gffFile=/home/gff_files/Athaliana_167_gene_exons.gff3 outDirPath=/home/uqagnieszka/results/ chromosomeList=all\n" + 
			"java -Xmx4g -jar SGSGeneLoss.jar bamPath=/home/uqagnieszka/bams/ bamFileList=arabidopsis.sorted.bam, arabidopsis2.sorted.bam gffFile=/home/gff_files/Athaliana_167_gene_exons.gff3 outDirPath=/home/uqagnieszka/results/ chromosomeList=Chr1,Chr2 minCov=2 lostCutoff=0.05 covCats=0,2,5,10,20 extendedFmt\n" + 
			"To see help run: java -Xmx4g -jar SGSGeneLoss.jar help";
	
	public static String getHelp(String[] cmd) {
		for (String c : cmd) {
			if (c.equals("help")) {
				return help;
			}
		}
		return null;
	}
	public static boolean getOutFmt(String[] cmd) {
		for (String c : cmd) {
			if (c.equals("extendedFmt")) {
				return true;
			}
		}
		return false;
	}
	public static String getBamFileList(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(bamListKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static String getChromList(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(chromListKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static String getCovCat(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(covCatKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static String getBamPath(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(bamPathKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static Integer getMinCov(String[] cmd) {
		Integer minCov = null;
		for (String c : cmd) {
			if (c.startsWith(minCovKey + separator)) {
				try {
					minCov = Integer.parseInt(c.substring(c.indexOf(separator) + 1));
				}
				catch (NumberFormatException e) {
					return null;
				}
				System.out.println(minCov);
				return minCov;
			}
		}
		return null;
	}	
	public static Double getLostCutoff(String[] cmd) {
		Double lCutoff = null;
		for (String c : cmd) {
			if (c.startsWith(lostCutoffKey + separator)) {
				try {
					lCutoff = Double.parseDouble(c.substring(c.indexOf(separator) + 1));
				}
				catch (NumberFormatException e) {
					return null;
				}
				System.out.println(lCutoff);
				return lCutoff;
			}
		}
		return null;
	}	
	public static String getOutDirPath(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(outPathKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static String getGffFile(String[] cmd) {
		for (String c : cmd) {
			if (c.startsWith(gffKey + separator)) {
				System.out.println(c.substring(c.indexOf(separator) + 1));
				return c.substring(c.indexOf(separator) + 1);
			}
		}
		return null;
	}
	public static void main(String[] args) {
		String h = getHelp(args);
		if (h != null) {
			System.out.println(help);
			System.exit(1);
		}
		if (args.length < 4) {
			System.out.println(help);
			System.exit(1);
		}
		String bamFilesPath = getBamPath(args);
		if(bamFilesPath == null) {
			System.out.println("Path to bam files not specified.");
			System.out.println(help);
			System.exit(1);
		}
		String bamFilesS=getBamFileList(args);
		if(bamFilesS == null){
			System.out.println("List of .bam files not specified.");
			System.out.println(help);
			System.exit(1);
		}
		String[] bamFiles = bamFilesS.split(",");
		if(bamFiles.length == 0){
			System.out.println("List of .bam files not specified.");
			System.out.println(help);
			System.exit(1);
		}
		Integer minCov = getMinCov(args);
		if(minCov  == null) {
			System.out.println("Minimum coverage not specified or not an integer.");
			minCov=1;
			System.out.println("Using default value: " + minCov);
		}
		Double lostCT = getLostCutoff(args);
		if(lostCT  == null) {
			System.out.println("Lost gene cufoff not specified or not a value [0,1].");
			lostCT=0.0;
			System.out.println("Using default value: " + lostCT);
		}
		String gffPath = getGffFile(args);
		if(gffPath == null) {
			System.out.println("Gff3 file not specified.");
			System.out.println(help);
			System.exit(1);
		}
		String outPath = getOutDirPath(args);
		if(outPath == null) {
			System.out.println("Path to the output folder not specified.");
			System.out.println(help);
			System.exit(1);
		}
		String chromList = getChromList(args).trim();
		if(chromList == null) {
			System.out.println("Chromosome list not specified.");
			chromList="all";
			System.out.println("Using default value: " + chromList);
		}
		String[] chrs = chromList.split(",");
		if(chrs.length == 0) {
			System.out.println("Chromosome list empty.");
			System.out.println(help);
			System.exit(1);
		}
		List<Integer> covCatList= new ArrayList<Integer>();
		String covCatS = getCovCat(args);
		String covCatSdef="0,10,20,40,70";
		if(covCatS == null || covCatS.split(",").length < 2 ) {
			System.out.println("List of coverage categories list not specified, or samller than two.");
			covCatList.add(0);
			covCatList.add(10);
			covCatList.add(20);
			covCatList.add(40);
			covCatList.add(70);
			System.out.println("Using default value: " + covCatSdef);
		} 
		else {
			for (String s : covCatS.split(",")) {
				try {
					int cat=Integer.parseInt(s);
					covCatList.add(cat);
				}
				catch (Exception e) {
					System.out.println("One of the categories not an integer");
					e.printStackTrace();
				}
			}
		}
		Boolean  outFmtE = getOutFmt(args);
		int noGenesAll=0;
		int noGenesPresent=0;
		int noGenesLost=0;
		
		int totLenAll=0;
		int totLenPresent=0;
		int totLenLost=0;
		
		int totELenAll=0;
		int totELenPresent=0;
		int totELenLost=0;
		
		int totENumAll=0;
		int totENumPresent=0;
		int totENumLost=0;
		
		List<String> lostList = new ArrayList<String>();
		List<String> chrInfList = new ArrayList<String>();
		
		// part I - get genes from gff3 file
		List<GeneEntry> geneList = null;
		try {
			geneList = ExCovUtils.getAllGenesGff3(gffPath, false);
		}
		catch (Exception e) {
			e.printStackTrace();
		}
		Map<String, List<GeneEntry>> genesInChrs = ExCovUtils
				.genesListToMap(geneList);
		// part II - calculate coverage
		// get first bam file, get header, get list of sequences, pass them as
		// chromosomes
		if (chrs[0].equalsIgnoreCase("all")) {
			SAMFileReader samR = null;
			try {
				samR = getBamFileReader("" + bamFilesPath.trim() + bamFiles[0]);
			}
			catch (Exception e) {
				e.printStackTrace();
				System.exit(1);
			}
			SAMFileHeader headS = samR.getFileHeader();
			SAMSequenceDictionary dictS = headS.getSequenceDictionary();
			List<SAMSequenceRecord> seqsR = dictS.getSequences();
			String[] chrArr = new String[seqsR.size()];
			int cnt = 0;
			for (SAMSequenceRecord s : seqsR) {
				chrArr[cnt] = s.getSequenceName();
				cnt++;
			}
			samR.close();
			chrs = chrArr;
		}
		for (String s : chrs) {
			if (genesInChrs.get(s) == null) {
				System.out.println("Chromosome: " + s + " has no genes.");
				continue;
			}
			List<Integer> finalCoverage = getCoverage(bamFiles, bamFilesPath, s, chrInfList);
			ExCovUtils.checkGenesUseExons(finalCoverage, genesInChrs.get(s),
					minCov);
		}
		// part IV - print results to a file
		for (String ch : chrs) {
			if (genesInChrs.get(ch) == null) {
				continue;
			}
			PrintWriter print = null;
			try {
				print = new PrintWriter(new FileOutputStream(outPath + ch
						+ ".excov", false));
				// add coverage for all exons in extended output 
				if (outFmtE) {
					print.println("chromosome,ID,is_lost,start_position,end_postion,frac_exons_covered,frac_gene_covered,ave_cov_depth_exons,cov_cat,ave_cove_depth_gene,all_exons_ids,all_exons_length,all_exons_frac_covered,all_exons_cov_depth,notes");
				}
				else {
					print.println("chromosome,ID,is_lost,start_position,end_postion,frac_exons_covered,frac_gene_covered,ave_cov_depth_exons,cov_cat,ave_cove_depth_gene");
				}
				for (GeneEntry gene : genesInChrs.get(ch)) {
					String exLen = exonsLengthsToString(gene.getExons());
					//double aveELen = exonsAveLen(gene.getExons());
					int totELen = exonsTotLen(gene.getExons());
					int totGLen = gene.getEnd() - gene.getStart() + 1;
					String exIDs=exonsIDsToString(gene.getExons());
					String exCovH=exonsCovHorToString(gene.getExons());
					String exCovV=exonsCovVerToString(gene.getExons());
					// assign a coverage class for visualization in R, 0 - 1,
					// 0-10 - 2, 10-20 - 3, 20-40 - 4, 40-70 - 5, 70+ - 6
					String covClass = "";
					double cov = gene.getAveExonsCov();
					double covH = gene.getFracExonsCov();
					noGenesAll++;
					totLenAll=totLenAll+totGLen;
					totELenAll=totELenAll+totELen;
					totENumAll=totENumAll+gene.getExons().size();
					boolean isLost;
					String lost="";
					if (covH <= lostCT) {
						isLost=true;
						noGenesLost++;
						totLenLost=totLenLost+totGLen;
						totELenLost=totELenLost+totELen;
						totENumLost=totENumLost+gene.getExons().size();
					}
					else {
						isLost=false;
						noGenesPresent++;
						totLenPresent=totLenPresent+totGLen;
						totELenPresent=totELenPresent+totELen;
						totENumPresent=totENumPresent+gene.getExons().size();
					}
//					if (cov == 0.0) {
//						covClass = "not_covered";
//					}
//					if (cov > 0 && cov <= 10) {
//						covClass = "0-10";
//					}
//					if (cov > 10 && cov <= 20) {
//						covClass = "10-20";
//					}
//					if (cov > 20 && cov <= 40) {
//						covClass = "20-40";
//					}
//					if (cov > 40 && cov <= 70) {
//						covClass = "40-70";
//					}
//					if (cov > 70) {
//						covClass = "70+";
//					}
//					boolean hasLostExon = false;
//					for (ExonEntry e : gene.getExons()) {
//						if (e.getFracCov() <= lostCT) {
//							hasLostExon=true;
//						}
//					}
					if (isLost) {
						lost="LOST";
					} 
					else {
						lost="PRESENT";
					}
					int catNum = covCatList.size();
					int first = covCatList.get(0);
					int last = covCatList.get(catNum-1);
					if (cov <= first) {
						if (first==0) {
							covClass="not_covered";
						}
						else {
							covClass="<="+first;
						}
					} else if (cov > last) {
						covClass=last+"+";
					} else {
						for (int i=0; i < catNum; i++) {
							int j=i+1;
							if (cov > covCatList.get(i) && cov <= covCatList.get(j)) {
								covClass=covCatList.get(i)+"-"+covCatList.get(j);
							}
						}
					}
					if (isLost) {
						String printStr = ch + "," + gene.getID() +"," + gene.getStart() + ","
								+ gene.getStart();
						lostList.add(printStr);
					}
					if(outFmtE) {
						print.println(ch + "," + gene.getID() + ","+ lost +"," + gene.getStart() + ","
							+ gene.getEnd() + "," + gene.getFracExonsCov() + ","
							+ gene.getCovFrac() + "," + gene.getAveExonsCov() + "," + covClass + "," + gene.getAveGeneCov() + "," + gene.getNumberOfExons() + "," 
							+ exIDs + "," + exLen + "," + exCovH + "," + exCovV + "," +  gene.getNote()); 
					}
					else {
						print.println(ch + "," + gene.getID() + ","+ lost + "," + gene.getStart() + ","
								+ gene.getEnd() + "," + gene.getFracExonsCov() + ","
								+ gene.getCovFrac() + "," + gene.getAveExonsCov() + "," + covClass + "," + gene.getAveGeneCov());
					}
				}
				print.close();
			}
			catch (Exception e) {
				e.printStackTrace();
			}
		}
		
		double aveLenAll=(double)totLenAll/(double)noGenesAll;
		double aveLenPresent=(double)totLenPresent/(double)noGenesPresent;
		double aveLenLost=(double)totLenLost/(double)noGenesLost;
		
		double aveELenAll=(double)totELenAll/(double)totENumAll;
		double aveELenPresent=(double)totELenPresent/(double)totENumPresent;
		double aveELenLost=(double)totELenLost/(double)totENumLost;
		
		double aveENumAll=(double)totENumAll/(double)noGenesAll;
		double aveENumPresent=(double)totENumPresent/(double)noGenesPresent;
		double aveENumLost=(double)totENumLost/(double)noGenesLost;
		
		PrintWriter printS= null;
		try {
			printS = new PrintWriter(new FileOutputStream(outPath + "stats.txt", false));
			printS.println("Bam path: " + bamFilesPath);
			printS.println("Bam files: " + bamFilesS);
			printS.println("Gff path: " + gffPath);
			printS.println("Chromosome list: " + chromList);
			printS.println("Minimum cov: " + minCov);
			printS.println("Gene loss cutoff: " + lostCT);
			printS.println("Coverage categories: " + covCatSdef);
			printS.println();
			printS.println("Total number of genes: " + noGenesAll);
			printS.println("Total number of genes lost: " + noGenesLost);
			printS.println("Average gene length: " + aveLenAll);
			printS.println("Average gene length, genes not lost: " + aveLenPresent);
			printS.println("Average gene length, genes lost: " + aveLenLost);
			printS.println("Average exon length: " + aveELenAll);
			printS.println("Average exon length, genes not lost: " + aveELenPresent);
			printS.println("Average exon length, genes lost: " + aveELenLost);
			printS.println("Average exon number: " + aveENumAll);
			printS.println("Average exon number, genes not lost: " + aveENumPresent);
			printS.println("Average exon number, genes lost: " + aveENumLost);
			printS.close();
				
		}
		catch (Exception es){
			es.printStackTrace();
		}
		PrintWriter printL= null;
		PrintWriter printCh = null;
		try {
			printL = new PrintWriter(new FileOutputStream(outPath + "graph.csv", false));
			printCh = new PrintWriter(new FileOutputStream(outPath + "chrs.csv", false));
			printL.println("chr,id,start,end");
			printCh.println("chr,start,end,len");
			for (String s : lostList) {
				printL.println(s);
			}
			for (String s: chrInfList) {
				printCh.println(s);
			}
			printL.close();
			printCh.close();
				
		}
		catch (Exception es){
			es.printStackTrace();
		}
		
	}
	public static SAMFileReader getBamFileReader(String bamFilePath)
			throws Exception {
		File bamFile = new File(bamFilePath);
		File baiFile = new File(bamFile.getAbsolutePath() + ".bai");
		SAMFileReader sam = new SAMFileReader(bamFile, baiFile, true);
		sam.setValidationStringency(ValidationStringency.SILENT);
		if (!sam.hasIndex())
			throw new Exception("Missing index file.");
		return sam;
	}
	public static String exonsLengthsToString(List<ExonEntry> exs) {
		String s = "";
		for (ExonEntry e : exs) {
			int len = e.getEnd() - e.getStart() + 1;
			s = s + len + ";";
		}
		return s.substring(0, s.length() - 1 );
	}
	public static String exonsIDsToString(List<ExonEntry> exs) {
		String i = "";
		for (ExonEntry e : exs) {
			String id = e.getID();
			i = i + id + ";";
		}
		return i.substring(0, i.length() - 1 );
	}
	public static String exonsCovHorToString(List<ExonEntry> exs) {
		String i = "";
		for (ExonEntry e : exs) {
			double id = e.getFracCov();
			i = i + id + ";";
		}
		return i.substring(0, i.length() - 1 );
	}
	public static String exonsCovVerToString(List<ExonEntry> exs) {
		String i = "";
		for (ExonEntry e : exs) {
			double id = e.getAveCov();
			i = i + id + ";";
		}
		return i.substring(0, i.length() - 1 );
	}
	public static double exonsAveLen(List<ExonEntry> exs) {
		int totLen = 0;
		for (ExonEntry e : exs) {
			int exLen = (e.getEnd() - e.getStart() + 1);
			totLen = totLen + exLen;
		}
		return (double) totLen / (double) exs.size();
	}
	public static int exonsTotLen(List<ExonEntry> exs) {
		int totLen = 0;
		for (ExonEntry e : exs) {
			int exLen = (e.getEnd() - e.getStart() + 1);
			totLen = totLen + exLen;
		}
		return totLen;
	}
	public static List<Integer> getCoverage(String[] bamFiles,
			String bamFilesPath, String chromosome, List<String> chromInfo) {
		List<Integer> finalCoverage = null;
		for (String bam : bamFiles) {
			SAMFileReader samR = null;
			try {
				samR = getBamFileReader("" + bamFilesPath.trim() + bam.trim());
			}
			catch (Exception e) {
				e.printStackTrace();
				System.exit(1);
			}
			SAMFileHeader headS = samR.getFileHeader();
			List<Integer> coverage = new ArrayList<Integer>();
			IntervalList seqIL = new IntervalList(headS);
// used to have interval from 0, now from 1, no difference in results
// picard indexing from 1
			Interval seqI = new Interval(chromosome, 1, headS.getSequence(
					chromosome).getSequenceLength());
			seqIL.add(seqI);
			SamLocusIterator lItr = new SamLocusIterator(samR, seqIL, true);
			System.out.println("I'm chromosome: "
					+ headS.getSequence(chromosome).getSequenceName()
					+ " and I'm "
					+ headS.getSequence(chromosome).getSequenceLength()
					+ " bp long.");
			chromInfo.add(headS.getSequence(chromosome).getSequenceName()+","+"1"+","+headS.getSequence(chromosome).getSequenceLength()+","+headS.getSequence(chromosome).getSequenceLength());
			lItr.setEmitUncoveredLoci(true);
			lItr.setSamFilters(null);
			int lastPosition = 0;
			for (SamLocusIterator.LocusInfo inf : lItr) {
				int pos = inf.getPosition();
				if (pos == (lastPosition + 2)) {
					int cov = 0;
					coverage.add(cov);
				}
				coverage.add(inf.getRecordAndPositions().size());
				lastPosition = pos;
			}
			// remedy for the situation where there is a single 0 following
			// covered or uncovered position at the end, normally would not get
			// emitted
			// used to be a problem, not sure if it still is
			// it was a known bug fixed as of v1.88
			if (lastPosition == headS.getSequence(chromosome)
					.getSequenceLength() - 1) {
				coverage.add(0);
			}
			if (coverage.size() != headS.getSequence(chromosome)
					.getSequenceLength()) {
				System.out
						.println("WARNING: coverege array size not equals what reported in the header");
			}
			if (bamFiles.length == 1) {
				finalCoverage = coverage;
			}
			else {
				if (finalCoverage == null) {
					finalCoverage = new ArrayList<Integer>();
					for (int i = 0; i < headS.getSequence(chromosome)
							.getSequenceLength(); i++) {
						finalCoverage.add(0);
					}
				}
				for (int i = 0; i < coverage.size(); i++) {
					int cov = finalCoverage.get(i);
					int newCov = cov + coverage.get(i);
					finalCoverage.set(i, newCov);
				}
			}
			lItr.close();
			samR.close();
		}
		return finalCoverage;
	}
}
