Commit 290fe98d authored by A.J. Sethi's avatar A.J. Sethi

wrote new launcher script

parent e88698ee
# ClaiRO
ClaiRO is a low-bias method to detect and quantify exon-defining units from paired-end RNA-Seq alignments. ClaiRO searches for read-pairs from cDNA fragments which span internal exons. We refere to these read-pairs are known as exon-defining units (ExoDUs)
* Exon-defining unit (ExoDU) are cDNA fragments which span internal exons on protein-coding transcripts. Paired-end sequencing alignments contain numerous pairs which span ExoDU, termed exon-defining unit spanners (ExoDUs)
.
* ClaiRO searches for ExoDUs . ClaiRO is a low-bias method to detect and quantify exon-defining units from paired-end RNA-Seq alignments.
![Alt text](https://git.nci.org.au/as7425/ClaiRO/uploads/b6ae37839335a888101923565a7080cb/Screen_Shot_2020-10-18_at_4.38.50_pm.png?raw=true "Exon-defining units")
# Dependencies
# Dependencies
ClaiRO relies on several packages. Please ensure these are available within your PATH.
ClaiRO has been designed for use with the following packages. Please ensure these are available within your PATH:
* Bedtools 2.29.2+ (Quinlan, 2014)
* Samtools 1.11.1+ (Li et al., 2009)
* Picard 2.23+ (Broad Institute, 2019)
* Bedtools 2.29.2 (Quinlan, 2014)
* GNU Parallel 20191122 (Tange, 2018)
* Picard 2.23.8 (Broad Institute, 2019)
* Regtools 0.5.2 (Feng et al., 2018)
* Samtools 1.11.1 (Li et al., 2009)
* Subread FeatureCounts 2.0.1 (Liao and Shi, 2019)
* GNU Parallel 20191122+ (Tange, 2018)
* Whippet 0.11.1 (Sterne-Wiler et al., 2018)
# Installation
ClaiRO is a collection of scripts and does not require installation directly.
To clone into the repository, use the following command:
# Installation
ClaiRO is a collection of scripts and does not require installation directly.
To clone into the repository, use the following command:
`$ git clone --recursive https://git.nci.org.au/as7425/ClaiRO.git`
......@@ -26,4 +31,3 @@ ClaiRO runs on unix-like environments.
# Usage
to be confirmed
......@@ -2,6 +2,42 @@
# written by A.J. Sethi on 2020-09-06
export PATH="/home/150/as7425/apps/regtools/build:$PATH" || die "cannot add regtools to path"
## structre:
# arg1: path to bam
# arg2: path to sj_out
regtools junctions extract -a 4 -m 25 -M 1000000 -o ${myWorkingDir}/sj.bed -s 0
# define the die function
die() { printf "$(date +%F)\t$(date +%T)\t[scriptDied] because it $*\n"; trap "exit 1" SIGTERM; exit 1; }; export -f die
## check arguments
# check the binary alignment ($1)
samCounts=$(samtools view $1 | wc -l || die "cannot view $1")
[ ${samCounts} -gt "0" ] || die "cannot find reads in your alignment $1"
# check the output directory
export outputDir="${2}"
mkdir -p ${outputDir} 2>/dev/null
[ -d ${outputDir} ] || die "cannot access output directory"
echo $outputDir
## check binaries
# define checkAvailable, which checks if bins are available in path
function checkAvailable {
builtin type -P "$1" &> /dev/null
}
# see if regtools is avaialble
checkAvailable "regtools" || die "cannot access regtools"
# see if samtools is avaialble
checkAvailable "samtools" || die "cannot access samtools"
## use regtools to extract splice-junctions from our binary
regtools junctions extract -a 4 -m 25 -M 1000000 -o ${outputDir}/a.txt -s 0 ${1}
regtools junctions annotate -o ${outputDir}/b.txt ${outputDir}/a.txt /g/data/lf10/as7425/genomes/human_genome/Homo_sapiens.GRCh38.dna.primary_assembly.fa /g/data/lf10/as7425/genomes/human_genome/Homo_sapiens.GRCh38.100.chr.gtf 2>/dev/null
echo "done with A"
cat ${outputDir}/b.txt | cut -f1-3,5 > ${outputDir}/c.txt && echo "done"
......@@ -90,7 +90,7 @@ do
echo "doing runmode test ${OPTARG}"
if [[ ${OPTARG} == "intron" ]]; then
export MODE="intron"
echo "shrek"
else die "could not parse the user provided runmode, ${OPTARG}"; fi
;;
......
#!/bin/bash
# written by A.J. Sethi on 2020-10-18
# submit with;
# -a /path/to/folder/with/annotations (i.e. fasta + gtf)
# -b /path/to/folder/with/bam
# -o /path/to/output/output/directory
# -t /preferred/threadcount
####################################
# admin functions
# die function
die() { printf "$(date +%F)\t$(date +%T)\t[scriptDied] characterise-ExoDUs.sh died because it $*\n"; exit 1; }; export -f die
# new section notification
nsec() { printf "$(date +%F)\t$(date +%T)\t$*\n"; }; export -f nsec
# subsection notification
ssec() { printf "$(date +%F)\t$(date +%T)\t\t> $*\n"; }; export -f ssec
# verbose-subsection
vsec() { if [ "${VERBOSE}" == "TRUE" ]; then printf "$(date +%F)\t$(date +%T)\t\t> $*\n"; fi; }; export -f vsec
####################################
# housekeeping
nsec "ClaiRO initiated"
# test that all the requisite packages are avialable in path
# define a function that returns status 1 if a package isn't available
function checkAvailable {
builtin type -P "$1" &> /dev/null
}
# iterate over the required packages
for targetMod in samtools bedtools parallel java julia; do
checkAvailable $targetMod || die "cannot locate package $targetMod in user PATH"
done
# specify the default verbosity and threadcount
export threadCount="8"
# initilalize bamcount and mode
export bamCount=0
export MODE="die"
# process the input arguments
ARGS=""
while [ $# -gt 0 ]
do
unset OPTIND
unset OPTARG
while getopts a:b:o:t:v:m: options
do
case $options in
a) # annotation
[ -d ${OPTARG} ] && export input="${OPTARG}" && inputFiles=`ls -d ${OPTARG}/*.*` || die "cannot fetch input files" # get a list of files in the primary input
gunzipCount=$(echo $inputFiles | grep -c -e ".gz"); if [ ${gunzipCount} -gt "0" ]; then ssec "unzipping ${gunzipCount} files in ${1}"; for i in ${1}/*; do gunzip ${i} || die "cannot unzip ${i} " & done; wait; fi # unzip any .gz files if they are present
fastaCount=$(echo $inputFiles | tr " " "\n" | grep -v ".fai" | grep -c -e ".fasta" -e ".fa" -e "fna"); [ ${fastaCount} -eq "1" ] || die "did not identify a single input fasta in ${1}" # check that a fasta of some sort is present
export myFasta="${input}/$(ls ${OPTARG} | tr " " "\n" | grep -v ".fai" | grep -e ".fasta" -e ".fa" -e "fna")"
GTFCount=$(echo $inputFiles | grep -c ".gtf"); [ "${GTFCount}" -eq "1" ] || die "did not identify a single input gtf in ${1}" # check that a single gtf is present
export myAnnotation="${input}/$(ls ${OPTARG} | grep -e ".gtf")"
;;
b) # binary alignments
[ -d ${OPTARG} ] && export bamDir="${OPTARG}" && bamList=`ls -d ${OPTARG}/*.*` || die "cannot fetch input files" # get a list of files in the primary input
bamCount=$(echo $bamList | grep -c -e ".bam")
[ ${bamCount} -gt "0" ] || die "no binary alignments found in ${bamDir}"
;;
o) # output directory
export outputDirectory="${OPTARG}"
mkdir -p ${outputDirectory} 2>/dev/null
[ -d ${outputDirectory} ] || die "cannot access the user-specified output directory, '${outputDirectory}'"
;;
t) # threadCount
if [ ! -z "${OPTARG}" ]; then [[ "${OPTARG}" =~ ^-?[0-9]+$ ]] && export threadCount="${OPTARG}" && ssec "set threadcount to ${threadCount}" || die "detected that user supplied invalid threadcount"; else export threadCount="4" && ssec "setting threadCount to 4 by default"; fi
;;
m) # runmode; test if we want to run the entire index or just a specific region
echo "doing runmode test ${OPTARG}"
if [[ ${OPTARG} == "intron" ]]; then
export MODE="intron"
echo "shrek"
else die "could not parse the user provided runmode, ${OPTARG}"; fi
;;
v) # verbosity
if [ ${OPTARG} == "TRUE" ]; then export VERBOSE="TRUE"; else export VERBOSE="false"; fi
ssec "Note: ClaiRO will proceed in verbose mode"
;;
m) # runmode; test if we want to run the entire index or just a specific region
echo "doing runmode test ${OPTARG}"
if [[ ${OPTARG} == "intron" ]]; then
export MODE="intron"
echo "shrek"
else die "could not parse the user provided runmode, ${OPTARG}"; fi
;;
esac;
done
shift $((OPTIND-1))
ARGS="${ARGS} $1 "
shift
done
### validate the input arguments
# validate the annotation
[ -f ${myAnnotation} ] && [ -f ${myFasta} ] || die "user supplied invalid annotation"
vsec "your annotation is ${myAnnotation}"
vsec "your reference sequence is ${myReferenceSequence}"
# validate the binary alignments
[ "${bamCount}" -gt "0" ] || die "did not detect any bam files in the input"
ssec "detected ${bamCount} bam files"
# validate output directory
[ -d "${outputDirectory}" ] || die "cannot access your output directory"
vsec "Your output directory is ${outputDirectory}"
# validate threadCount
ssec "proceeding with ${threadCount} threads"
# validate the runmode
[ ${MODE} == "die" ] && die "user did not define runmode"
ssec "your runmode is ${MODE}"
vsec "completed housekeeping; parsing GTF"
####################################################################################################
####################################################################################################
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment