Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PROGRAM_NAME = sickle
VERSION = 1.33
CC = gcc
CFLAGS = -Wall -pedantic -DVERSION=$(VERSION)
CFLAGS = -Wall -pedantic -std=c99 -DVERSION=$(VERSION)
DEBUG = -g
OPT = -O3
ARCHIVE = $(PROGRAM_NAME)_$(VERSION)
Expand Down
4 changes: 3 additions & 1 deletion src/sickle.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ exit(EXIT_SUCCESS); \
break;
/* end code drawn from system.h */

#define STDERR_OR_OUT(status) (status == EXIT_SUCCESS ? stdout : stderr)

typedef enum {
PHRED,
SANGER,
Expand Down Expand Up @@ -96,6 +98,6 @@ typedef struct __cutsites_ {
/* Function Prototypes */
int single_main (int argc, char *argv[]);
int paired_main (int argc, char *argv[]);
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug);
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int window_size, int no_fiveprime, int trunc_n, int debug);

#endif /*SICKLE_H*/
11 changes: 6 additions & 5 deletions src/sliding.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) {
}


cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) {
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int window_size, int no_fiveprime, int trunc_n, int debug) {

int window_size = (int) (0.1 * fqrec->seq.l);
if (!window_size)
window_size = (int) (0.1 * fqrec->seq.l);
int i,j;
int window_start=0;
int window_total=0;
Expand All @@ -53,9 +54,9 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
return (retvals);
}

/* if the seq length is less then 10bp, */
/* if the seq length is less then 10bp or the fixed window size is larger than the sequence length, */
/* then make the window size the length of the seq */
if (window_size == 0) window_size = fqrec->seq.l;
if (window_size == 0 || window_size > fqrec->seq.l) window_size = fqrec->seq.l;

for (i=0; i<window_size; i++) {
window_total += get_quality_num (fqrec->qual.s[i], qualtype, fqrec, i);
Expand All @@ -65,7 +66,7 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int

window_avg = (double)window_total / (double)window_size;

if (debug) printf ("no_fiveprime: %d, found 5prime: %d, window_avg: %f\n", no_fiveprime, found_five_prime, window_avg);
if (debug) printf ("no_fiveprime: %d, found 5prime: %d, window_avg: %f, window_size: %d\n", no_fiveprime, found_five_prime, window_avg, window_size);

/* Finding the 5' cutoff */
/* Find when the average quality in the window goes above the threshold starting from the 5' end */
Expand Down
154 changes: 103 additions & 51 deletions src/trim_paired.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,105 @@ __KSEQ_READ
int paired_qual_threshold = 20;
int paired_length_threshold = 20;

static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:w:xng";
static struct option paired_long_options[] = {
{"qual-type", required_argument, 0, 't'},
{"pe-file1", required_argument, 0, 'f'},
{"pe-file2", required_argument, 0, 'r'},
{"pe-combo", required_argument, 0, 'c'},
{"output-pe1", required_argument, 0, 'o'},
{"output-pe2", required_argument, 0, 'p'},
{"output-single", required_argument, 0, 's'},
{"output-combo", required_argument, 0, 'm'},
{"qual-threshold", required_argument, 0, 'q'},
{"length-threshold", required_argument, 0, 'l'},
{"no-fiveprime", no_argument, 0, 'x'},
{"truncate-n", no_argument, 0, 'n'},
{"gzip-output", no_argument, 0, 'g'},
{"output-combo-all", required_argument, 0, 'M'},
{"quiet", no_argument, 0, 'z'},
{ "qual-type", required_argument, 0, 't' },
{ "pe-file1", required_argument, 0, 'f' },
{ "pe-file2", required_argument, 0, 'r' },
{ "pe-combo", required_argument, 0, 'c' },
{ "output-pe1", required_argument, 0, 'o' },
{ "output-pe2", required_argument, 0, 'p' },
{ "output-single", required_argument, 0, 's' },
{ "output-combo", required_argument, 0, 'm' },
{ "qual-threshold", required_argument, 0, 'q' },
{ "length-threshold", required_argument, 0, 'l' },
{ "window", required_argument, 0, 'w' },
{ "no-fiveprime", no_argument, 0, 'x' },
{ "truncate-n", no_argument, 0, 'n' },
{ "gzip-output", no_argument, 0, 'g' },
{ "output-combo-all", required_argument, 0, 'M' },
{ "quiet", no_argument, 0, 'z' },
{ "debug", no_argument, 0, 'd' },
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};

void paired_usage (int status, char *msg) {
static const char *usage_format =
"\n"
"If you have separate files for forward and reverse reads:\n"
" Usage: %1$s pe [options] -f <paired-end forward fastq file>\n"
" %2$*3$s -r <paired-end reverse fastq file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -o <trimmed PE forward file>\n"
" %2$*3$s -p <trimmed PE reverse file>\n"
" %2$*3$s -s <trimmed singles file>\n"
"\n"
"If you have one file with interleaved forward and reverse reads:\n"
" Usage: %1$s pe [options] -c <interleaved input file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -m <interleaved trimmed paired-end output>\n"
" %2$*3$s -s <trimmed singles file>\n"
"\n"
"If you have one file with interleaved reads as input and you want\n"
"ONLY one interleaved file as output:\n"
" Usage: %1$s pe [options] -c <interleaved input file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -M <interleaved trimmed output>\n"
"\n"
"Options:\n"
"\n"
"Paired-end separated reads\n"
"--------------------------\n"
"-f, --pe-file1 Input paired-end forward fastq file\n"
"-r, --pe-file2 Input paired-end reverse fastq file\n"
" (input files must have same number of records)\n"
"-o, --output-pe1 Output trimmed forward fastq file\n"
"-p, --output-pe2 Output trimmed reverse fastq file. Must use -s option.\n"
"\n"
"Paired-end interleaved reads\n"
"----------------------------\n"
"-c, --pe-combo Combined (interleaved) input paired-end fastq\n"
"-m, --output-combo Output combined (interleaved) paired-end fastq file.\n"
" Must use -s option.\n"
"-M, --output-combo-all Output combined (interleaved) paired-end fastq file\n"
" with any discarded read written to output file as a\n"
" single N. Cannot be used with the -s option.\n"
"\n"
"Global options\n"
"--------------\n"
"-t TYPE, --qual-type TYPE Type of quality values, one of:\n"
" solexa (CASAVA < 1.3)\n"
" illumina (CASAVA 1.3 to 1.7)\n"
" sanger (which is CASAVA >= 1.8)\n"
" (required)\n"
"-s FILE, --output-single FILE Output trimmed singles fastq file\n"
"-q #, --qual-threshold # Threshold for trimming based on average quality\n"
" in a window. Default %3$d.\n"
"-l #, --length-threshold # Threshold to keep a read based on length after\n"
" trimming. Default %4$d.\n"
"-w #, --window # Fixed window size to use. Default is a dynamic\n"
" window size of 0.1 of the read length.\n"
"-x, --no-fiveprime Don't do five prime trimming.\n"
"-n, --truncate-n Truncate sequences at position of first N.\n"
"-g, --gzip-output Output gzipped files.\n"
"--quiet Do not output trimming info\n"
"--help Display this help and exit\n"
"--version Output version information and exit\n"
;

if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg );

fprintf(
STDERR_OR_OUT(status),
usage_format,
PROGRAM_NAME,
"", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself
paired_qual_threshold,
paired_length_threshold
);

fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n");
fprintf(stderr, "Usage: %s pe [options] -f <paired-end forward fastq file> -r <paired-end reverse fastq file> -t <quality type> -o <trimmed PE forward file> -p <trimmed PE reverse file> -s <trimmed singles file>\n\n", PROGRAM_NAME);
fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n");
fprintf(stderr, "Usage: %s pe [options] -c <interleaved input file> -t <quality type> -m <interleaved trimmed paired-end output> -s <trimmed singles file>\n\n\
If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\
Usage: %s pe [options] -c <interleaved input file> -t <quality type> -M <interleaved trimmed output>\n\n", PROGRAM_NAME, PROGRAM_NAME);
fprintf(stderr, "Options:\n\
Paired-end separated reads\n\
--------------------------\n\
-f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\
-r, --pe-file2, Input paired-end reverse fastq file\n\
-o, --output-pe1, Output trimmed forward fastq file\n\
-p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\
Paired-end interleaved reads\n\
----------------------------\n");
fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\
-m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\
-M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\
Global options\n\
--------------\n\
-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n");
fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\
-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
-x, --no-fiveprime, Don't do five prime trimming.\n\
-n, --truncate-n, Truncate sequences at position of first N.\n");


fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\
--help, display this help and exit\n\
--version, output version information and exit\n\n");

if (msg) fprintf(stderr, "%s\n\n", msg);
exit(status);
}

Expand Down Expand Up @@ -118,10 +161,11 @@ int paired_main(int argc, char *argv[]) {
int combo_all=0;
int combo_s=0;
int total=0;
int window_size = 0;

while (1) {
int option_index = 0;
optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index);
optc = getopt_long(argc, argv, paired_short_options, paired_long_options, &option_index);

if (optc == -1)
break;
Expand Down Expand Up @@ -198,6 +242,14 @@ int paired_main(int argc, char *argv[]) {
}
break;

case 'w':
window_size = atoi(optarg);
if (window_size < 1) {
fprintf(stderr, "Fixed window size must be >= 1\n");
return EXIT_FAILURE;
}
break;

case 'x':
no_fiveprime = 1;
break;
Expand Down Expand Up @@ -374,8 +426,8 @@ int paired_main(int argc, char *argv[]) {
break;
}

p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, window_size, no_fiveprime, trunc_n, debug);
p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, window_size, no_fiveprime, trunc_n, debug);
total += 2;

if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
Expand Down
91 changes: 63 additions & 28 deletions src/trim_single.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,66 @@ __KSEQ_READ
int single_qual_threshold = 20;
int single_length_threshold = 20;

static const char *single_short_options = "df:t:o:q:l:w:zxng";
static struct option single_long_options[] = {
{"fastq-file", required_argument, 0, 'f'},
{"output-file", required_argument, 0, 'o'},
{"qual-type", required_argument, 0, 't'},
{"qual-threshold", required_argument, 0, 'q'},
{"length-threshold", required_argument, 0, 'l'},
{"no-fiveprime", no_argument, 0, 'x'},
{"discard-n", no_argument, 0, 'n'},
{"gzip-output", no_argument, 0, 'g'},
{"quiet", no_argument, 0, 'z'},
{ "fastq-file", required_argument, 0, 'f' },
{ "output-file", required_argument, 0, 'o' },
{ "qual-type", required_argument, 0, 't' },
{ "qual-threshold", required_argument, 0, 'q' },
{ "length-threshold", required_argument, 0, 'l' },
{ "no-fiveprime", no_argument, 0, 'x' },
{ "window", required_argument, 0, 'w' },
{ "trunc-n", no_argument, 0, 'n' },
{ "truncate-n", no_argument, 0, 'n' },
{ "gzip-output", no_argument, 0, 'g' },
{ "quiet", no_argument, 0, 'z' },
{ "debug", no_argument, 0, 'd' },
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};

void single_usage(int status, char *msg) {
static const char *usage_format =
"\n"
"Usage: %1$s se [options] -f <fastq sequence file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -o <trimmed fastq file>\n"
"\n"
"Options:\n"
"\n"
"-f FILE, --fastq-file FILE Input fastq file (required)\n"
"-o FILE, --output-file FILE Output trimmed fastq file (required)\n"
"-t TYPE, --qual-type TYPE Type of quality values, one of:\n"
" solexa (CASAVA < 1.3)\n"
" illumina (CASAVA 1.3 to 1.7)\n"
" sanger (which is CASAVA >= 1.8)\n"
" (required)\n"
"-q #, --qual-threshold # Threshold for trimming based on average quality\n"
" in a window. Default %3$d.\n"
"-l #, --length-threshold # Threshold to keep a read based on length after\n"
" trimming. Default %4$d.\n"
"-w #, --window # Fixed window size to use. Default is a dynamic\n"
" window size of 0.1 of the read length.\n"
"-x, --no-fiveprime Don't do five prime trimming.\n"
"-n, --truncate-n Truncate sequences at position of first N.\n"
"-g, --gzip-output Output gzipped files.\n"
"--quiet Do not output trimming info\n"
"--help Display this help and exit\n"
"--version Output version information and exit\n"
;

if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg );

fprintf(
STDERR_OR_OUT(status),
usage_format,
PROGRAM_NAME,
"", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself
single_qual_threshold,
single_length_threshold
);

fprintf(stderr, "\nUsage: %s se [options] -f <fastq sequence file> -t <quality type> -o <trimmed fastq file>\n\
\n\
Options:\n\
-f, --fastq-file, Input fastq file (required)\n\
-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n\
-o, --output-file, Output trimmed fastq file (required)\n", PROGRAM_NAME);

fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
-x, --no-fiveprime, Don't do five prime trimming.\n\
-n, --trunc-n, Truncate sequences at position of first N.\n\
-g, --gzip-output, Output gzipped files.\n\
--quiet, Don't print out any trimming information\n\
--help, display this help and exit\n\
--version, output version information and exit\n\n");

if (msg) fprintf(stderr, "%s\n\n", msg);
exit(status);
}

Expand All @@ -73,10 +99,11 @@ int single_main(int argc, char *argv[]) {
int trunc_n = 0;
int gzip_output = 0;
int total=0;
int window_size = 0;

while (1) {
int option_index = 0;
optc = getopt_long(argc, argv, "df:t:o:q:l:zxng", single_long_options, &option_index);
optc = getopt_long(argc, argv, single_short_options, single_long_options, &option_index);

if (optc == -1)
break;
Expand Down Expand Up @@ -124,6 +151,14 @@ int single_main(int argc, char *argv[]) {
}
break;

case 'w':
window_size = atoi(optarg);
if (window_size < 1) {
fprintf(stderr, "Fixed window size must be >= 1\n");
return EXIT_FAILURE;
}
break;

case 'x':
no_fiveprime = 1;
break;
Expand Down Expand Up @@ -192,7 +227,7 @@ int single_main(int argc, char *argv[]) {

while ((l = kseq_read(fqrec)) >= 0) {

p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug);
p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, window_size, no_fiveprime, trunc_n, debug);
total++;

if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
Expand Down