-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmark_duplicates.sh
More file actions
36 lines (28 loc) · 1.09 KB
/
mark_duplicates.sh
File metadata and controls
36 lines (28 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
######-l mem=16gb,walltime=24:00:00,ncpus=8
#PBS -lselect=1:ncpus=32:mem=124gb
#PBS -lwalltime=24:00:00
module load samtools/1.3.1
module load picard/2.6.0
module load java/jdk-8u66
# Input parameter to this file should be bamlist,
# which is space seperated string of all bam files
# for processing.
bams=( ${bamlist} )
bam=${bams[$PBS_ARRAY_INDEX]}
sample=$( basename ${bam} .raw.bam )
# Set threshold for optical duplicates based on pixel density according to Illumina pipeline
ODP=100
prog=`samtools view -H "${bam}" | egrep ^@PG | perl -nle 'print $& if m{(?<=CL:bwa )samse}'`
if [[ ! -z "$prog" ]]; then
ODP=10
#//Older pipelines have smaller tile coordinates//
fi
java -Xmx16g -jar "${PICARD_HOME}/picard.jar" MarkDuplicates \
VALIDATION_STRINGENCY=LENIENT OPTICAL_DUPLICATE_PIXEL_DISTANCE="$ODP" \
TMP_DIR=/rds/general/user/tsewell/projects/fisher-bd-results/live/markup_tmp \
MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 \
INPUT="$bam" OUTPUT="${bam%.raw.bam}.mkdup.bam" \
METRICS_FILE="${bam%.bam}.mkdup.metrics"
# Re-index new bam file...
samtools index "${bam%.raw.bam}.mkdup.bam"