#!/usr/bin/Rscript --vanilla

# Copyright (c) 2011 Expression Analysis / Gunjan Hariani, Erik Aronesty
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# 
# $Id: fastx-graph 525 2012-12-25 19:41:22Z earonesty $
 
if (!require(getopt)) {
    write(c("Installing package on:",system("hostname",intern=T)),file=stderr())
    install.packages('getopt',repos='http://R-Forge.R-project.org')
    # will die if it fails at this point
    library(getopt)
}

if (!require("Hmisc")) {
    write(c("Installing package on:",system("hostname",intern=T)),file=stderr())
    install.packages('Hmisc',repos='http://R-Forge.R-project.org')
    # will die if it fails at this point
    library("Hmisc")
}

spec <- matrix(c(
        'input'  , 'i', 1, "character", "file from fastq-stats -x (required)",
        'gc'     , 'G', 1, "character", "input gc content file (optional)",
        'out'    , 'o', 1, "character", "output filename (optional)",
        'help'   , 'h', 0, "logical",   "this help"
),ncol=5,byrow=T)

opt = getopt(spec);

if (!is.null(opt$help) || is.null(opt$input)) {
    cat(paste(getopt(spec, usage=T, command="fastx-graph"),"\n"));
    q();
}

in.file <- opt$input
gc.file <- opt$gc
out.file <- opt$out

fastx <- read.table(in.file,sep="\t",header=T,as.is=T)

# output is in, minus txt, plus png
if (is.null(out.file)) {
    in.file <- gsub(".txt$","",in.file,perl=T)
    out.file <- paste(in.file,".png",sep="")
}

# correct for bug in R if the file has a % sign in it
out.file <- gsub("%","%%",out.file)

png(out.file,width=1000, height=500)

par(mar=c(4,3,5,5),xaxs="i",yaxs="i",xpd=T)
plot(c(0,0),pch="",ylim=c(min(c(0,fastx$lW)),max(41,fastx$rW)),xlim=c(0,(nrow(fastx)+1)),
	xlab="",ylab="",las=2,cex.axis=.85,cex.lab=.85,xaxt="n")

lims <- par("xaxp")[1:2] 
major.ticks <- pretty(lims,n=par("xaxp")[3]) 
minor.tick(nx=(major.ticks[2]-major.ticks[1]),ny=10)

minor.ticks <- 1:nrow(fastx)
mtext("Cycle",side=3,line=3,cex=.85)
axis(1,at=major.ticks,cex.axis=.85,labels=F)
axis(3,at=major.ticks,cex.axis=.85,labels=F)
axis(3,at=minor.ticks,tcl=par("tcl")*0.5,labels=minor.ticks,cex.axis=.80,las=3) 

colnames(fastx) <- c("column","count","min","max","sum","mean","Q1","med",
			"Q3","IQR","lW","rW","A_count","C_count","G_count",
			"T_count","N_count","Max_count")

for(i in 1:nrow(fastx)) {
	par(new=T)
	rect(fastx$column[i]-.35,fastx$Q1[i],fastx$column[i]+0.35,fastx$Q3[i],col="gray")
	segments(fastx$column[i]-.35,fastx$med[i],fastx$column[i]+0.35,fastx$med[i],col="red",lwd=1.5)
	segments(fastx$column[i],fastx$lW[i],fastx$column[i],fastx$Q1[i],lty="dashed")
	segments(fastx$column[i],fastx$Q3[i],fastx$column[i],fastx$rW[i],lty="dashed")
	segments(fastx$column[i]-.35,fastx$lW[i],fastx$column[i]+0.35,fastx$lW[i])
	segments(fastx$column[i]-.35,fastx$rW[i],fastx$column[i]+0.35,fastx$rW[i])
}

# if theres a significant difference
tots<-fastx$N_count+fastx$T_count+fastx$G_count+fastx$C_count+fastx$A_count

if (min(tots) < (max(tots)*.98)) {
    par(new=T)
    plot(tots,col="gray",xaxt="n",yaxt="n",xlab="",ylab="",pch="+",ylim=c(min(tots),max(tots)))
    lines(fastx$N_count+fastx$T_count+fastx$G_count+fastx$C_count+fastx$A_count,col="gray",xaxt="n",yaxt="n",xlab="",ylab="", lty=2)
}

par(new=T)
plot(fastx$column,fastx$A_count*100/fastx$count,col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
			ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$C_count*100/fastx$count,col="blue",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
			ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$G_count*100/fastx$count,col="green",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
			ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$T_count*100/fastx$count,col="black",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
			ylim=c(0,100),lwd=2)
par(new=T)
barplot(fastx$N_count*100/fastx$count,col="orange",xaxt="n",yaxt="n",xlab="",ylab="",
			ylim=c(0,100))

axis(4,at=seq(0,100,25),cex.axis=.85)
mtext("Pct Base Distribution",side=4,line=3,cex=.85)
mtext("Base Quality",side=2,line=2,cex=.85)
legend(0,-4,col=c("red","blue","green","black","orange"),lty=1,
	legend=c("A","C","G","T","N"),cex=.70,horiz=T,lwd=2) 

if(!is.null(gc.file)) {
	tmp <- read.table(gc.file,sep="\t",header=F,as.is=T)
    if (tmp[1,1] == 'pct-GC') {
        # silly legacy format
	    GC <- read.table(gc.file,sep="\t",header=T,as.is=T,skip=3)
    } else if (tmp[1,1] == 'pct_GC') {
	    GC <- read.table(gc.file,sep="\t",header=T,as.is=T,skip=1)
    } else {
	    GC <- read.table(gc.file,sep="\t",header=T,as.is=T)
    }
    colnames(GC)=c("pct_gc", "count")
	par(new=T)
	par(fig=c(0.1,0.2,0.45,0.60))
	par(mar=c(0,0,1,0))
	plot(GC$pct_gc,GC$count,type="l",xaxt="n",yaxt="n",
			main="%GC per read",cex.main=.90)
	axis(1,seq(0,100,20),labels=seq(0,100,20),las=2,tck=-0.1,cex.axis=.75)
}


graphics.off()
