Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PROGRAM_NAME = sickle
VERSION = 1.33
CC = gcc
CFLAGS = -Wall -pedantic -DVERSION=$(VERSION)
CFLAGS = -Wall -pedantic -std=c99 -DVERSION=$(VERSION)
DEBUG = -g
OPT = -O3
ARCHIVE = $(PROGRAM_NAME)_$(VERSION)
Expand Down
4 changes: 3 additions & 1 deletion src/sickle.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ exit(EXIT_SUCCESS); \
break;
/* end code drawn from system.h */

#define STDERR_OR_OUT(status) (status == EXIT_SUCCESS ? stdout : stderr)

typedef enum {
PHRED,
SANGER,
Expand Down Expand Up @@ -96,6 +98,6 @@ typedef struct __cutsites_ {
/* Function Prototypes */
int single_main (int argc, char *argv[]);
int paired_main (int argc, char *argv[]);
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug);
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug);

#endif /*SICKLE_H*/
14 changes: 9 additions & 5 deletions src/sliding.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) {
}


cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) {
cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug) {

int window_size = (int) (0.1 * fqrec->seq.l);
int i,j;
Expand Down Expand Up @@ -112,10 +112,14 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
}


/* If truncate N option is selected, and sequence has Ns, then */
/* change 3' cut site to be the base before the first N */
if (trunc_n && ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n")))) {
three_prime_cut = npos - fqrec->seq.s;
/* If truncate N option is selected, and sequence has Ns, then
* change 3' cut site to be the base before the first N.
* If drop N option is selected, omit the sequence. */
if ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n"))) {
if (trunc_n)
three_prime_cut = npos - fqrec->seq.s;
else if (drop_n)
three_prime_cut = five_prime_cut = -1;
}

/* if cutting length is less than threshold then return -1 for both */
Expand Down
154 changes: 103 additions & 51 deletions src/trim_paired.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,104 @@ __KSEQ_READ
int paired_qual_threshold = 20;
int paired_length_threshold = 20;

static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xnNg";
static struct option paired_long_options[] = {
{"qual-type", required_argument, 0, 't'},
{"pe-file1", required_argument, 0, 'f'},
{"pe-file2", required_argument, 0, 'r'},
{"pe-combo", required_argument, 0, 'c'},
{"output-pe1", required_argument, 0, 'o'},
{"output-pe2", required_argument, 0, 'p'},
{"output-single", required_argument, 0, 's'},
{"output-combo", required_argument, 0, 'm'},
{"qual-threshold", required_argument, 0, 'q'},
{"length-threshold", required_argument, 0, 'l'},
{"no-fiveprime", no_argument, 0, 'x'},
{"truncate-n", no_argument, 0, 'n'},
{"gzip-output", no_argument, 0, 'g'},
{"output-combo-all", required_argument, 0, 'M'},
{"quiet", no_argument, 0, 'z'},
{ "qual-type", required_argument, 0, 't' },
{ "pe-file1", required_argument, 0, 'f' },
{ "pe-file2", required_argument, 0, 'r' },
{ "pe-combo", required_argument, 0, 'c' },
{ "output-pe1", required_argument, 0, 'o' },
{ "output-pe2", required_argument, 0, 'p' },
{ "output-single", required_argument, 0, 's' },
{ "output-combo", required_argument, 0, 'm' },
{ "qual-threshold", required_argument, 0, 'q' },
{ "length-threshold", required_argument, 0, 'l' },
{ "no-fiveprime", no_argument, 0, 'x' },
{ "truncate-n", no_argument, 0, 'n' },
{ "drop-n", no_argument, 0, 'N' },
{ "gzip-output", no_argument, 0, 'g' },
{ "output-combo-all", required_argument, 0, 'M' },
{ "quiet", no_argument, 0, 'z' },
{ "debug", no_argument, 0, 'd' },
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};

void paired_usage (int status, char *msg) {
static const char *usage_format =
"\n"
"If you have separate files for forward and reverse reads:\n"
" Usage: %1$s pe [options] -f <paired-end forward fastq file>\n"
" %2$*3$s -r <paired-end reverse fastq file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -o <trimmed PE forward file>\n"
" %2$*3$s -p <trimmed PE reverse file>\n"
" %2$*3$s -s <trimmed singles file>\n"
"\n"
"If you have one file with interleaved forward and reverse reads:\n"
" Usage: %1$s pe [options] -c <interleaved input file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -m <interleaved trimmed paired-end output>\n"
" %2$*3$s -s <trimmed singles file>\n"
"\n"
"If you have one file with interleaved reads as input and you want\n"
"ONLY one interleaved file as output:\n"
" Usage: %1$s pe [options] -c <interleaved input file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -M <interleaved trimmed output>\n"
"\n"
"Options:\n"
"\n"
"Paired-end separated reads\n"
"--------------------------\n"
"-f, --pe-file1 Input paired-end forward fastq file\n"
"-r, --pe-file2 Input paired-end reverse fastq file\n"
" (input files must have same number of records)\n"
"-o, --output-pe1 Output trimmed forward fastq file\n"
"-p, --output-pe2 Output trimmed reverse fastq file. Must use -s option.\n"
"\n"
"Paired-end interleaved reads\n"
"----------------------------\n"
"-c, --pe-combo Combined (interleaved) input paired-end fastq\n"
"-m, --output-combo Output combined (interleaved) paired-end fastq file.\n"
" Must use -s option.\n"
"-M, --output-combo-all Output combined (interleaved) paired-end fastq file\n"
" with any discarded read written to output file as a\n"
" single N. Cannot be used with the -s option.\n"
"\n"
"Global options\n"
"--------------\n"
"-t TYPE, --qual-type TYPE Type of quality values, one of:\n"
" solexa (CASAVA < 1.3)\n"
" illumina (CASAVA 1.3 to 1.7)\n"
" sanger (which is CASAVA >= 1.8)\n"
" (required)\n"
"-s FILE, --output-single FILE Output trimmed singles fastq file\n"
"-q #, --qual-threshold # Threshold for trimming based on average quality\n"
" in a window. Default %3$d.\n"
"-l #, --length-threshold # Threshold to keep a read based on length after\n"
" trimming. Default %4$d.\n"
"-x, --no-fiveprime Don't do five prime trimming.\n"
"-n, --truncate-n Truncate sequences at position of first N.\n"
"-N, --drop-n Discard sequences containing an N.\n"
"-g, --gzip-output Output gzipped files.\n"
"--quiet Do not output trimming info\n"
"--help Display this help and exit\n"
"--version Output version information and exit\n"
;

if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg );

fprintf(
STDERR_OR_OUT(status),
usage_format,
PROGRAM_NAME,
"", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself
paired_qual_threshold,
paired_length_threshold
);

fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n");
fprintf(stderr, "Usage: %s pe [options] -f <paired-end forward fastq file> -r <paired-end reverse fastq file> -t <quality type> -o <trimmed PE forward file> -p <trimmed PE reverse file> -s <trimmed singles file>\n\n", PROGRAM_NAME);
fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n");
fprintf(stderr, "Usage: %s pe [options] -c <interleaved input file> -t <quality type> -m <interleaved trimmed paired-end output> -s <trimmed singles file>\n\n\
If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\
Usage: %s pe [options] -c <interleaved input file> -t <quality type> -M <interleaved trimmed output>\n\n", PROGRAM_NAME, PROGRAM_NAME);
fprintf(stderr, "Options:\n\
Paired-end separated reads\n\
--------------------------\n\
-f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\
-r, --pe-file2, Input paired-end reverse fastq file\n\
-o, --output-pe1, Output trimmed forward fastq file\n\
-p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\
Paired-end interleaved reads\n\
----------------------------\n");
fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\
-m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\
-M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\
Global options\n\
--------------\n\
-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n");
fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\
-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
-x, --no-fiveprime, Don't do five prime trimming.\n\
-n, --truncate-n, Truncate sequences at position of first N.\n");


fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\
--help, display this help and exit\n\
--version, output version information and exit\n\n");

if (msg) fprintf(stderr, "%s\n\n", msg);
exit(status);
}

Expand Down Expand Up @@ -114,14 +156,15 @@ int paired_main(int argc, char *argv[]) {
int quiet = 0;
int no_fiveprime = 0;
int trunc_n = 0;
int drop_n = 0;
int gzip_output = 0;
int combo_all=0;
int combo_s=0;
int total=0;

while (1) {
int option_index = 0;
optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index);
optc = getopt_long(argc, argv, paired_short_options, paired_long_options, &option_index);

if (optc == -1)
break;
Expand Down Expand Up @@ -206,6 +249,10 @@ int paired_main(int argc, char *argv[]) {
trunc_n = 1;
break;

case 'N':
drop_n = 1;
break;

case 'g':
gzip_output = 1;
break;
Expand Down Expand Up @@ -236,6 +283,11 @@ int paired_main(int argc, char *argv[]) {
paired_usage(EXIT_FAILURE, "****Error: Quality type is required.");
}

if (trunc_n && drop_n) {
fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n");
return EXIT_FAILURE;
}

/* make sure minimum input filenames are specified */
if (!infn1 && !infnc) {
paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument.");
Expand Down Expand Up @@ -374,8 +426,8 @@ int paired_main(int argc, char *argv[]) {
break;
}

p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug);
p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug);
total += 2;

if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
Expand Down
91 changes: 63 additions & 28 deletions src/trim_single.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,65 @@ __KSEQ_READ
int single_qual_threshold = 20;
int single_length_threshold = 20;

static const char *single_short_options = "df:t:o:q:l:zxnNg";
static struct option single_long_options[] = {
{"fastq-file", required_argument, 0, 'f'},
{"output-file", required_argument, 0, 'o'},
{"qual-type", required_argument, 0, 't'},
{"qual-threshold", required_argument, 0, 'q'},
{"length-threshold", required_argument, 0, 'l'},
{"no-fiveprime", no_argument, 0, 'x'},
{"discard-n", no_argument, 0, 'n'},
{"gzip-output", no_argument, 0, 'g'},
{"quiet", no_argument, 0, 'z'},
{ "fastq-file", required_argument, 0, 'f' },
{ "output-file", required_argument, 0, 'o' },
{ "qual-type", required_argument, 0, 't' },
{ "qual-threshold", required_argument, 0, 'q' },
{ "length-threshold", required_argument, 0, 'l' },
{ "no-fiveprime", no_argument, 0, 'x' },
{ "trunc-n", no_argument, 0, 'n' },
{ "truncate-n", no_argument, 0, 'n' },
{ "drop-n", no_argument, 0, 'N' },
{ "gzip-output", no_argument, 0, 'g' },
{ "quiet", no_argument, 0, 'z' },
{ "debug", no_argument, 0, 'd' },
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};

void single_usage(int status, char *msg) {
static const char *usage_format =
"\n"
"Usage: %1$s se [options] -f <fastq sequence file>\n"
" %2$*3$s -t <quality type>\n"
" %2$*3$s -o <trimmed fastq file>\n"
"\n"
"Options:\n"
"\n"
"-f FILE, --fastq-file FILE Input fastq file (required)\n"
"-o FILE, --output-file FILE Output trimmed fastq file (required)\n"
"-t TYPE, --qual-type TYPE Type of quality values, one of:\n"
" solexa (CASAVA < 1.3)\n"
" illumina (CASAVA 1.3 to 1.7)\n"
" sanger (which is CASAVA >= 1.8)\n"
" (required)\n"
"-q #, --qual-threshold # Threshold for trimming based on average quality\n"
" in a window. Default %3$d.\n"
"-l #, --length-threshold # Threshold to keep a read based on length after\n"
" trimming. Default %4$d.\n"
"-x, --no-fiveprime Don't do five prime trimming.\n"
"-n, --truncate-n Truncate sequences at position of first N.\n"
"-N, --drop-n Discard sequences containing an N.\n"
"-g, --gzip-output Output gzipped files.\n"
"--quiet Do not output trimming info\n"
"--help Display this help and exit\n"
"--version Output version information and exit\n"
;

if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg );

fprintf(
STDERR_OR_OUT(status),
usage_format,
PROGRAM_NAME,
"", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself
single_qual_threshold,
single_length_threshold
);

fprintf(stderr, "\nUsage: %s se [options] -f <fastq sequence file> -t <quality type> -o <trimmed fastq file>\n\
\n\
Options:\n\
-f, --fastq-file, Input fastq file (required)\n\
-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n\
-o, --output-file, Output trimmed fastq file (required)\n", PROGRAM_NAME);

fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
-x, --no-fiveprime, Don't do five prime trimming.\n\
-n, --trunc-n, Truncate sequences at position of first N.\n\
-g, --gzip-output, Output gzipped files.\n\
--quiet, Don't print out any trimming information\n\
--help, display this help and exit\n\
--version, output version information and exit\n\n");

if (msg) fprintf(stderr, "%s\n\n", msg);
exit(status);
}

Expand All @@ -71,12 +96,13 @@ int single_main(int argc, char *argv[]) {
int quiet = 0;
int no_fiveprime = 0;
int trunc_n = 0;
int drop_n = 0;
int gzip_output = 0;
int total=0;

while (1) {
int option_index = 0;
optc = getopt_long(argc, argv, "df:t:o:q:l:zxng", single_long_options, &option_index);
optc = getopt_long(argc, argv, single_short_options, single_long_options, &option_index);

if (optc == -1)
break;
Expand Down Expand Up @@ -132,6 +158,10 @@ int single_main(int argc, char *argv[]) {
trunc_n = 1;
break;

case 'N':
drop_n = 1;
break;

case 'g':
gzip_output = 1;
break;
Expand Down Expand Up @@ -187,12 +217,17 @@ int single_main(int argc, char *argv[]) {
}
}

if (trunc_n && drop_n) {
fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n");
return EXIT_FAILURE;
}


fqrec = kseq_init(se);

while ((l = kseq_read(fqrec)) >= 0) {

p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug);
p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, drop_n, debug);
total++;

if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
Expand Down