...
 
Commits (2)
......@@ -10,7 +10,7 @@
# ARG4=threadcount (optional), an integer that indicates how many threads to be used
# example to run
# dos2unix /home/150/as7425/RNA-Pluto/core-ExoDUs/characterise-ExoDUs.sh &>/dev/null && bash /home/150/as7425/RNA-Pluto/core-ExoDUs/characterise-ExoDUs.sh "/scratch/lf10/as7425/ExoDUs_index_drosophila" "/scratch/lf10/as7425/totalData.bam" "yeast" "/scratch/lf10/as7425/" "48"
# dos2unix /home/150/as7425/ClaiRO/core-ExoDUs/characterise-ExoDUs.sh &>/dev/null && bash /home/150/as7425/ClaiRO/core-ExoDUs/characterise-ExoDUs.sh "/scratch/lf10/as7425/ExoDUs_index_drosophila" "/scratch/lf10/as7425/totalData.bam" "/scratch/lf10/as7425/" "48"
####################################################################################################
......@@ -29,7 +29,7 @@ ssec() { printf "$(date +%F)\t$(date +%T)\t\t> $*\n"; }; export -f ssec
# recover temporary reads function
export myOutput="${3}"
recoverTemp () {
export temp="${myOutput}/temp"; mkdir ${temp} 2>/dev/null; [ -d ${temp} ] || echo "cannot access temp directory" && exit 1
export temp="${myOutput}/temp"; mkdir -p ${temp} 2>/dev/null; [ -d ${temp} ] || echo "cannot access temp directory" && exit 1
cp -r /dev/shm/* ${temp} && echo "brought the reads home" || echo "cannot bring home the reads" && exit 1
exit 1
}; export -f recoverTemp
......@@ -58,7 +58,11 @@ else export threadCount="48" && ssec "defaulting to threadcount = 48"; fi
# check that modules, including picard, are available
module load samtools bedtools parallel java || die "cannot load one or more modules" # load module in a Gadi-specific manner
[ -n "${SCRIPTPATH+set}" ] || SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" || die "cannot get script path"; export SCRIPTPATH="${SCRIPTPATH}" # get the script path # taken from https://stackoverflow.com/questions/4774054/reliable-way-for-a-bash-script-to-get-the-full-path-to-itself
cd ${SCRIPTPATH}/../../internal-ClaiRO/dependencies; export SCRIPTPATH=$(pwd)
echo "scriptpath is ${SCRIPTPATH}"
# find featureCounts
[ -f ${SCRIPTPATH}/../dependencies/subread-2.0.0-Linux-x86_64/bin/featureCounts ] || die "cannot find featureCounts"; export featureCounts="${SCRIPTPATH}/../dependencies/subread-2.0.0-Linux-x86_64/bin/featureCounts"
# find picard
......
......@@ -177,6 +177,7 @@ export od=${outputDirectory}
export matureSamMerge="${od}/matureSamMerge"
export wptIdx="${od}/whippetIndex"
export fqd="${od}/whippetFASTQ";
export wm=${od}/whippetMature;
# validate threadCount
ssec "proceeding with ${threadCount} threads"
......@@ -217,13 +218,20 @@ main() {
makeWhippetIndex || die "unable to prepare whippet index"
fi
# generate whippet index
# generate fastq
if [ "$nm" -lt "4" ]
then
echo "Preparing to analyse cytoplasmic alignments"
generateSequence || die "unable to prepare whippet index"
fi
# call whippet for mature transcripts
if [ "$nm" -lt "5" ]
then
echo "Preparing to analyse cytoplasmic alignments"
callWhippet || die "unable to prepare whippet index"
fi
die "no further instructions provided"
}
......@@ -240,8 +248,8 @@ function prepareForWhippetIndex() {
if [ -f "${matureSamMerge}/unique.sorted.merged.bam" ]; then
# check that we have records in the sam file
samRecords=$(samtools view ${matureSamMerge}/unique.sorted.merged.bam | head -n 10 | wc -l)
[ ${samRecords} -eq "0" ] && die "no valid records found in your alignment at ${matureSamMerge}/unique.sorted.merged.bam"
#samRecords=$(samtools view ${matureSamMerge}/unique.sorted.merged.bam | head -n 10 | wc -l)
#[ ${samRecords} -eq "0" ] && die "no valid records found in your alignment at ${matureSamMerge}/unique.sorted.merged.bam"
# record file modification date
mergedFileModDate=$(stat ${matureSamMerge}/unique.sorted.merged.bam | grep "Modify" | tr " " "\t" | cut -f2,3 | cut -f1 -d".")
......@@ -326,6 +334,17 @@ vsec "annotation is ${myAnnotation}"
# prepare a whippet index using novel splice junctions from cytoplasmic alignments
function makeWhippetIndex() {
whippetStatus=0
if [ -f ${od}/whippetIndex/index_mature.jls ]; then
# check that the whippet index doesn't already exist
mergedWhippetFileModDate=$(stat ${od}/whippetIndex/index_mature.jls | grep "Modify" | tr " " "\t" | cut -f2,3 | cut -f1 -d".")
# tell the user and return from the module
ssec "Output for already exists and was last modified at ${mergedWhippetFileModDate} --- Returning"
whippetStatus=1
fi
[ ${whippetStatus} -eq "1" ] && return 0
# ask whippet to make an index while recognizing splice junctions from our bam
# check we have a bamfile
[ -f "${matureSamMerge}/unique.sorted.merged.bam" ] || die "could not find cytoplasmicBam for whippet index"
......@@ -335,7 +354,7 @@ function makeWhippetIndex() {
cd ${wptIdx} || die "cannot access whippet index directory"
rm -rf ${wptIdx}/* # clear the index
ssec "launching whippet"
julia ${whippetPath}/whippet-index.jl --fasta "${myFasta}" --bam "${matureSamMerge}/unique.sorted.merged.bam" --gtf "${myAnnotation}" --bam-min-reads 3 --index "${od}/whippetIndex/index_" || die "canot make whippet index"
julia ${whippetPath}/whippet-index.jl --fasta "${myFasta}" --bam "${matureSamMerge}/unique.sorted.merged.bam" --gtf "${myAnnotation}" --bam-min-reads 3 --index "${od}/whippetIndex/index_mature" || die "canot make whippet index"
ssec "succesfully build whippet index"
}
......@@ -357,12 +376,20 @@ function generateSequence() {
bamNameTemp=${bam##*/} # trim bam path to filename only
bamName=${bam%./} # remove .bam extension
seqStatus=0
if [ -f "${fqd}/${bamName}.fastq" ]; then
ssec "found ${fqd}/${bamName}.fastq"
seqStatus=1
fi
setStatus=1 && return 0
# sort the file by name so fastq reads are paired
samtools sort -l 0 -m 4G -@ 4 -n ${bam} > ${fqd}/${bamName}.bam
# use samtools to extract a fastq fron the sorted bam file
samtools fastq -@ 4 ${fqd}/${bamName}.bam > ${fqd}/${bamName}.fastq && rm ${fqd}/${bamName}.bam || die "cannot generate fastq for ${bamName}"
}
}; export -f fetchFastq
# call function fetchFastq for each cytoplasmic alignment in our cyto folder;
cd ${cbamDir} && ls *bam | parallel -j "${threadCount}" fetchFastq {} || die "cannot write fastqs for each ..."
......@@ -381,13 +408,25 @@ function generateSequence() {
function callWhippet() {
## compute whippet over each fastq
# generate a list of fastqs
targfqs=`ls -d ${fqd}/*.fastq`
mkdir ${wm} 2>/dev/null
# for each fastq, call whippet
# now, seperate the data by strand
cd /dev/shm/exonsByGene/ && ls | parallel -j "${qtc}" strandSeparateExons {} || die "cannot split your exons by strand"
function whippetMature() {
# first, check if the output already exists
# next, validate the fastq
seq=${fqd}/${1}
seqCount=$(cat $seq | wc -l)
[ ${seqCount} -eq "0" ] && die "${seq} has length 0"
# finally, call whippet
julia ${whippetPath}/whippet-quant.jl -o ${wm}/${1%.*}_ -x ${od}/whippetIndex/index_mature.jls ${seq} || die "whippet failed specifically for ${seq}"
}; export -f whippetMature
cd ${fqd} && ls *.fastq | parallel -j "${qtc}" whippetMature {} || die "cannot split your exons by strand"
ssec "assigned strand to all your exons"
}
......