Commit 2d0a8cb1 authored by A.J. Sethi's avatar A.J. Sethi

Updated index script to generate whippet index

parent bf74b238
......@@ -3,7 +3,7 @@
# written by A.J. Sethi on 2020-10-18
# testing;
# module load clairo; bash ~/ClaiRO/core-w/2020-10-18_makeIndex.sh -a "/g/data/lf10/as7425/SRscan/genome_dmel-r6.35" -w "/home/150/as7425/.julia/v0.6/Whippet/bin/" -n "/scratch/lf10/as7425/nascent_bam" -c "/scratch/lf10/as7425/cytoplasmic_bam" -o "/scratch/lf10/as7425/clairo_out"
# module load clairo; bash ~/ClaiRO/core-w/2020-10-18_makeIndex.sh -a "/g/data/lf10/as7425/genomes/human_genome" -w "/home/150/as7425/.julia/v0.6/Whippet/bin/" -n "/g/data/lf10/as7425/2020-10-20_castilloGuzman_analysis/clairoInput/nascentAlignments" -c "/g/data/lf10/as7425/2020-10-20_castilloGuzman_analysis/clairoInput/cytoplasmicAlignments" -o "/g/data/lf10/as7425/2020-10-20_castilloGuzman_analysis/clairoOutput" -t "48" -v "TRUE"
# submit with;
......@@ -176,27 +176,58 @@ vsec "completed housekeeping; parsing GTF"
####################################
# generate on the fly whippet index using mature bams
nsec "preparing cytoplasmic alignments for whippet indexing"
# first, merge the mature bams
matureSamMerge="${od}/matureSamMerge"; mkdir -p ${matureSamMerge} 2>/dev/null
cd ${cbamDir} && samtools merge -f -@ ${threadCount} ${matureSamMerge}/merged.bam `ls *.bam` 2>/dev/null || die "cannot merge the cytoplasmic RNA alignments"
samtools sort -@ ${threadCount} ${matureSamMerge}/merged.bam > ${matureSamMerge}/sorted.merged.bam || die "cannot sort your bam"
samtools rmdup -S ${matureSamMerge}/sorted.merged.bam ${matureSamMerge}/unique.sorted.merged.bam || die "cannot remove duplicates"
samtools index -@ ${threadCount} ${matureSamMerge}/unique.sorted.merged.bam || die "cannot index your merged bam"
# merge bams
vsec "preparing to merge mature alignments"
if [ -f "${matureSamMerge}/merged.bam" ];
then vsec "previous merged alignment found -- skipping merge step";
else
cd ${cbamDir} && samtools merge -f -@ ${threadCount} ${matureSamMerge}/merged.bam `ls *.bam` 2>/dev/null || die "cannot merge the cytoplasmic RNA alignments";
fi
ssec "fasta is ${myFasta}"
ssec "bam is "${matureSamMerge}/unique.sorted.merged.bam""
ssec "annotation is ${myAnnotation}"
# sort the merged bams
vsec "preparing to sort your merged mature alignments"
if [ -f "${matureSamMerge}/sorted.merged.bam" ];
then vsec "previous merged alignment found -- skipping sort step";
else
samtools sort -@ ${threadCount} ${matureSamMerge}/merged.bam > ${matureSamMerge}/sorted.merged.bam || die "cannot sort your bam"
fi
# remove duplicates from sorted, merged bam
# samtools is poorly optimized
# so we first split the bam into chromosomes
# then remove duplicates
# and finally remerge everything
vsec "preparing to removed duplicates from sorted + merged alignments"
if [ -f "${matureSamMerge}/unique.sorted.merged.bam" ];
then vsec "previous unique alignment found -- skipping deduplication step";
else # proceed to split the bam
samtools rmdup -S ${matureSamMerge}/sorted.merged.bam ${matureSamMerge}/unique.sorted.merged.bam || die "cannot remove duplicates"
fi
# index the unique bam
vsec "indexing sunique alignments"
if [ -f "${matureSamMerge}/unique.sorted.merged.bam.bai" ];
then vsec "previous index found -- skipping indexing step";
else samtools index -@ ${threadCount} ${matureSamMerge}/unique.sorted.merged.bam || die "cannot index your merged bam"
fi
### testing
### tell me the files we're using for the whippet indexing step
vsec "notifications incoming:"
vsec "fasta is ${myFasta}"
vsec "bam is "${matureSamMerge}/unique.sorted.merged.bam""
vsec "annotation is ${myAnnotation}"
# ask whippet to make an index while recognizing splice junctions from our bam
wptIdx="/g/data/lf10/as7425/SRscan/index-whippet"; mkdir ${wptIdx} 2>/dev/null; cd ${wptIdx} || die "cannot access whippet index directory"; rm -rf ${wptIdx}/*
julia ${whippetPath}/whippet-index.jl --fasta "${myFasta}" --bam "${matureSamMerge}/unique.sorted.merged.bam" --gtf "${myAnnotation}" --bam-min-reads 3 --index ${wptIdx} || die "canot make whippet index"
exit 0
####################################
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment