From 50b58d1dedaa9a8573dad8531ba51e8570ed61c1 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:35:31 -0700 Subject: [PATCH 1/7] Sync long options with short options so --debug works for -d --- src/trim_paired.c | 1 + src/trim_single.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/trim_paired.c b/src/trim_paired.c index dcf2eb1..0b71ecb 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -32,6 +32,7 @@ static struct option paired_long_options[] = { {"gzip-output", no_argument, 0, 'g'}, {"output-combo-all", required_argument, 0, 'M'}, {"quiet", no_argument, 0, 'z'}, + {"debug", no_argument, 0, 'd'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} diff --git a/src/trim_single.c b/src/trim_single.c index 2f59921..c3e94e6 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -25,6 +25,7 @@ static struct option single_long_options[] = { {"discard-n", no_argument, 0, 'n'}, {"gzip-output", no_argument, 0, 'g'}, {"quiet", no_argument, 0, 'z'}, + {"debug", no_argument, 0, 'd'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} From 3d240675816b6b801d2009de8dc83f775203d53a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 25 Feb 2015 11:15:06 -0800 Subject: [PATCH 2/7] Adjust single-end -n long option name to match paired-end mode Standardizes on --truncate-n. This preserves the --trunc-n long name previously mentioned in the SE usage. It removes the --discard-n long name only ever used internally, but never documented. --- src/trim_single.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/trim_single.c b/src/trim_single.c index c3e94e6..efad0ec 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -22,7 +22,8 @@ static struct option single_long_options[] = { {"qual-threshold", required_argument, 0, 'q'}, {"length-threshold", required_argument, 0, 'l'}, {"no-fiveprime", no_argument, 0, 'x'}, - {"discard-n", no_argument, 0, 'n'}, + {"trunc-n", no_argument, 0, 'n'}, + {"truncate-n", no_argument, 0, 'n'}, {"gzip-output", no_argument, 0, 'g'}, {"quiet", no_argument, 0, 'z'}, {"debug", no_argument, 0, 'd'}, @@ -43,7 +44,7 @@ Options:\n\ fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ -l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ -x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --trunc-n, Truncate sequences at position of first N.\n\ +-n, --truncate-n, Truncate sequences at position of first N.\n\ -g, --gzip-output, Output gzipped files.\n\ --quiet, Don't print out any trimming information\n\ --help, display this help and exit\n\ From a40f45c48722ed174df458b9a6f083f68d60356a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:40:26 -0700 Subject: [PATCH 3/7] Move short option spec closer to long option spec so they don't drift This will help keep them in sync when updating options. --- src/trim_paired.c | 3 ++- src/trim_single.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/trim_paired.c b/src/trim_paired.c index 0b71ecb..df119a9 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -16,6 +16,7 @@ __KSEQ_READ int paired_qual_threshold = 20; int paired_length_threshold = 20; +static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; static struct option paired_long_options[] = { {"qual-type", required_argument, 0, 't'}, {"pe-file1", required_argument, 0, 'f'}, @@ -122,7 +123,7 @@ int paired_main(int argc, char *argv[]) { while (1) { int option_index = 0; - optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index); + optc = getopt_long(argc, argv, paired_short_options, paired_long_options, &option_index); if (optc == -1) break; diff --git a/src/trim_single.c b/src/trim_single.c index efad0ec..f8a4dcb 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -15,6 +15,7 @@ __KSEQ_READ int single_qual_threshold = 20; int single_length_threshold = 20; +static const char *single_short_options = "df:t:o:q:l:zxng"; static struct option single_long_options[] = { {"fastq-file", required_argument, 0, 'f'}, {"output-file", required_argument, 0, 'o'}, @@ -78,7 +79,7 @@ int single_main(int argc, char *argv[]) { while (1) { int option_index = 0; - optc = getopt_long(argc, argv, "df:t:o:q:l:zxng", single_long_options, &option_index); + optc = getopt_long(argc, argv, single_short_options, single_long_options, &option_index); if (optc == -1) break; From 2de50e36c7c2b08daa60df49b0532b050ad85f3e Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 25 Feb 2015 11:17:44 -0800 Subject: [PATCH 4/7] Reformat long option structs for readability and maintainability No functional change, only whitespace (compare with git diff -w). --- src/trim_paired.c | 32 ++++++++++++++++---------------- src/trim_single.c | 22 +++++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/trim_paired.c b/src/trim_paired.c index df119a9..8cba95c 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -18,22 +18,22 @@ int paired_length_threshold = 20; static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; static struct option paired_long_options[] = { - {"qual-type", required_argument, 0, 't'}, - {"pe-file1", required_argument, 0, 'f'}, - {"pe-file2", required_argument, 0, 'r'}, - {"pe-combo", required_argument, 0, 'c'}, - {"output-pe1", required_argument, 0, 'o'}, - {"output-pe2", required_argument, 0, 'p'}, - {"output-single", required_argument, 0, 's'}, - {"output-combo", required_argument, 0, 'm'}, - {"qual-threshold", required_argument, 0, 'q'}, - {"length-threshold", required_argument, 0, 'l'}, - {"no-fiveprime", no_argument, 0, 'x'}, - {"truncate-n", no_argument, 0, 'n'}, - {"gzip-output", no_argument, 0, 'g'}, - {"output-combo-all", required_argument, 0, 'M'}, - {"quiet", no_argument, 0, 'z'}, - {"debug", no_argument, 0, 'd'}, + { "qual-type", required_argument, 0, 't' }, + { "pe-file1", required_argument, 0, 'f' }, + { "pe-file2", required_argument, 0, 'r' }, + { "pe-combo", required_argument, 0, 'c' }, + { "output-pe1", required_argument, 0, 'o' }, + { "output-pe2", required_argument, 0, 'p' }, + { "output-single", required_argument, 0, 's' }, + { "output-combo", required_argument, 0, 'm' }, + { "qual-threshold", required_argument, 0, 'q' }, + { "length-threshold", required_argument, 0, 'l' }, + { "no-fiveprime", no_argument, 0, 'x' }, + { "truncate-n", no_argument, 0, 'n' }, + { "gzip-output", no_argument, 0, 'g' }, + { "output-combo-all", required_argument, 0, 'M' }, + { "quiet", no_argument, 0, 'z' }, + { "debug", no_argument, 0, 'd' }, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} diff --git a/src/trim_single.c b/src/trim_single.c index f8a4dcb..5d8def0 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -17,17 +17,17 @@ int single_length_threshold = 20; static const char *single_short_options = "df:t:o:q:l:zxng"; static struct option single_long_options[] = { - {"fastq-file", required_argument, 0, 'f'}, - {"output-file", required_argument, 0, 'o'}, - {"qual-type", required_argument, 0, 't'}, - {"qual-threshold", required_argument, 0, 'q'}, - {"length-threshold", required_argument, 0, 'l'}, - {"no-fiveprime", no_argument, 0, 'x'}, - {"trunc-n", no_argument, 0, 'n'}, - {"truncate-n", no_argument, 0, 'n'}, - {"gzip-output", no_argument, 0, 'g'}, - {"quiet", no_argument, 0, 'z'}, - {"debug", no_argument, 0, 'd'}, + { "fastq-file", required_argument, 0, 'f' }, + { "output-file", required_argument, 0, 'o' }, + { "qual-type", required_argument, 0, 't' }, + { "qual-threshold", required_argument, 0, 'q' }, + { "length-threshold", required_argument, 0, 'l' }, + { "no-fiveprime", no_argument, 0, 'x' }, + { "trunc-n", no_argument, 0, 'n' }, + { "truncate-n", no_argument, 0, 'n' }, + { "gzip-output", no_argument, 0, 'g' }, + { "quiet", no_argument, 0, 'z' }, + { "debug", no_argument, 0, 'd' }, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} From cd8b0f807a086bff7672d67375790745f5773358 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:41:12 -0700 Subject: [PATCH 5/7] Overhaul usage formatting, both in code and when displayed The usage docs now indicate option arguments and are easier to read. If the usage was specifically requested with --help, then it is printed to stdout instead of stderr. This is useful for the common idiom of asking for help and piping to a pager like less or more (without redirecting stderr). --- src/sickle.h | 2 + src/trim_paired.c | 104 +++++++++++++++++++++++++++++++--------------- src/trim_single.c | 54 ++++++++++++++++-------- 3 files changed, 110 insertions(+), 50 deletions(-) diff --git a/src/sickle.h b/src/sickle.h index e7b679c..e78e667 100644 --- a/src/sickle.h +++ b/src/sickle.h @@ -61,6 +61,8 @@ exit(EXIT_SUCCESS); \ break; /* end code drawn from system.h */ +#define STDERR_OR_OUT(status) (status == EXIT_SUCCESS ? stdout : stderr) + typedef enum { PHRED, SANGER, diff --git a/src/trim_paired.c b/src/trim_paired.c index 8cba95c..119c3fd 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -40,40 +40,78 @@ static struct option paired_long_options[] = { }; void paired_usage (int status, char *msg) { + static const char *usage_format = + "\n" + "If you have separate files for forward and reverse reads:\n" + " Usage: %1$s pe [options] -f \n" + " %2$*3$s -r \n" + " %2$*3$s -t \n" + " %2$*3$s -o \n" + " %2$*3$s -p \n" + " %2$*3$s -s \n" + "\n" + "If you have one file with interleaved forward and reverse reads:\n" + " Usage: %1$s pe [options] -c \n" + " %2$*3$s -t \n" + " %2$*3$s -m \n" + " %2$*3$s -s \n" + "\n" + "If you have one file with interleaved reads as input and you want\n" + "ONLY one interleaved file as output:\n" + " Usage: %1$s pe [options] -c \n" + " %2$*3$s -t \n" + " %2$*3$s -M \n" + "\n" + "Options:\n" + "\n" + "Paired-end separated reads\n" + "--------------------------\n" + "-f, --pe-file1 Input paired-end forward fastq file\n" + "-r, --pe-file2 Input paired-end reverse fastq file\n" + " (input files must have same number of records)\n" + "-o, --output-pe1 Output trimmed forward fastq file\n" + "-p, --output-pe2 Output trimmed reverse fastq file. Must use -s option.\n" + "\n" + "Paired-end interleaved reads\n" + "----------------------------\n" + "-c, --pe-combo Combined (interleaved) input paired-end fastq\n" + "-m, --output-combo Output combined (interleaved) paired-end fastq file.\n" + " Must use -s option.\n" + "-M, --output-combo-all Output combined (interleaved) paired-end fastq file\n" + " with any discarded read written to output file as a\n" + " single N. Cannot be used with the -s option.\n" + "\n" + "Global options\n" + "--------------\n" + "-t TYPE, --qual-type TYPE Type of quality values, one of:\n" + " solexa (CASAVA < 1.3)\n" + " illumina (CASAVA 1.3 to 1.7)\n" + " sanger (which is CASAVA >= 1.8)\n" + " (required)\n" + "-s FILE, --output-single FILE Output trimmed singles fastq file\n" + "-q #, --qual-threshold # Threshold for trimming based on average quality\n" + " in a window. Default %3$d.\n" + "-l #, --length-threshold # Threshold to keep a read based on length after\n" + " trimming. Default %4$d.\n" + "-x, --no-fiveprime Don't do five prime trimming.\n" + "-n, --truncate-n Truncate sequences at position of first N.\n" + "-g, --gzip-output Output gzipped files.\n" + "--quiet Do not output trimming info\n" + "--help Display this help and exit\n" + "--version Output version information and exit\n" + ; + + if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg ); + + fprintf( + STDERR_OR_OUT(status), + usage_format, + PROGRAM_NAME, + "", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself + paired_qual_threshold, + paired_length_threshold + ); - fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n"); - fprintf(stderr, "Usage: %s pe [options] -f -r -t -o -p -s \n\n", PROGRAM_NAME); - fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n"); - fprintf(stderr, "Usage: %s pe [options] -c -t -m -s \n\n\ -If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\ -Usage: %s pe [options] -c -t -M \n\n", PROGRAM_NAME, PROGRAM_NAME); - fprintf(stderr, "Options:\n\ -Paired-end separated reads\n\ ---------------------------\n\ --f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\ --r, --pe-file2, Input paired-end reverse fastq file\n\ --o, --output-pe1, Output trimmed forward fastq file\n\ --p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\ -Paired-end interleaved reads\n\ -----------------------------\n"); - fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\ --m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\ --M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\ -Global options\n\ ---------------\n\ --t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n"); - fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\ --q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ --l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ --x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --truncate-n, Truncate sequences at position of first N.\n"); - - - fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\ ---help, display this help and exit\n\ ---version, output version information and exit\n\n"); - - if (msg) fprintf(stderr, "%s\n\n", msg); exit(status); } diff --git a/src/trim_single.c b/src/trim_single.c index 5d8def0..3a15274 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -34,24 +34,44 @@ static struct option single_long_options[] = { }; void single_usage(int status, char *msg) { + static const char *usage_format = + "\n" + "Usage: %1$s se [options] -f \n" + " %2$*3$s -t \n" + " %2$*3$s -o \n" + "\n" + "Options:\n" + "\n" + "-f FILE, --fastq-file FILE Input fastq file (required)\n" + "-o FILE, --output-file FILE Output trimmed fastq file (required)\n" + "-t TYPE, --qual-type TYPE Type of quality values, one of:\n" + " solexa (CASAVA < 1.3)\n" + " illumina (CASAVA 1.3 to 1.7)\n" + " sanger (which is CASAVA >= 1.8)\n" + " (required)\n" + "-q #, --qual-threshold # Threshold for trimming based on average quality\n" + " in a window. Default %3$d.\n" + "-l #, --length-threshold # Threshold to keep a read based on length after\n" + " trimming. Default %4$d.\n" + "-x, --no-fiveprime Don't do five prime trimming.\n" + "-n, --truncate-n Truncate sequences at position of first N.\n" + "-g, --gzip-output Output gzipped files.\n" + "--quiet Do not output trimming info\n" + "--help Display this help and exit\n" + "--version Output version information and exit\n" + ; + + if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg ); + + fprintf( + STDERR_OR_OUT(status), + usage_format, + PROGRAM_NAME, + "", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself + single_qual_threshold, + single_length_threshold + ); - fprintf(stderr, "\nUsage: %s se [options] -f -t -o \n\ -\n\ -Options:\n\ --f, --fastq-file, Input fastq file (required)\n\ --t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n\ --o, --output-file, Output trimmed fastq file (required)\n", PROGRAM_NAME); - - fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ --l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ --x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --truncate-n, Truncate sequences at position of first N.\n\ --g, --gzip-output, Output gzipped files.\n\ ---quiet, Don't print out any trimming information\n\ ---help, display this help and exit\n\ ---version, output version information and exit\n\n"); - - if (msg) fprintf(stderr, "%s\n\n", msg); exit(status); } From beebcdbed7054cfae4caf343cedaf4e226585e58 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 8 Oct 2014 16:49:32 -0700 Subject: [PATCH 6/7] Declare C99 compat Silences warnings about //-style comments and long strings. Since kseq.h uses inline functions, a feature of C99, it's not useful pretending to be C89 compat (GCC's default). --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 24ce4ac..0326696 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PROGRAM_NAME = sickle VERSION = 1.33 CC = gcc -CFLAGS = -Wall -pedantic -DVERSION=$(VERSION) +CFLAGS = -Wall -pedantic -std=c99 -DVERSION=$(VERSION) DEBUG = -g OPT = -O3 ARCHIVE = $(PROGRAM_NAME)_$(VERSION) From 01cc09fc18271aafcad380ef89c3d0208cf22abc Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 14 Aug 2014 12:22:37 -0700 Subject: [PATCH 7/7] Add a --drop-n / -N option to drop any sequence containing an N This is a stricter version of --truncate-n which may use sequence fragments up until the first N provided they pass the length filter. --- src/sickle.h | 2 +- src/sliding.c | 14 +++++++++----- src/trim_paired.c | 18 +++++++++++++++--- src/trim_single.c | 16 ++++++++++++++-- 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/sickle.h b/src/sickle.h index e78e667..c697b92 100644 --- a/src/sickle.h +++ b/src/sickle.h @@ -98,6 +98,6 @@ typedef struct __cutsites_ { /* Function Prototypes */ int single_main (int argc, char *argv[]); int paired_main (int argc, char *argv[]); -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug); +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug); #endif /*SICKLE_H*/ diff --git a/src/sliding.c b/src/sliding.c index 7573106..5a8193c 100644 --- a/src/sliding.c +++ b/src/sliding.c @@ -32,7 +32,7 @@ int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) { } -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) { +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug) { int window_size = (int) (0.1 * fqrec->seq.l); int i,j; @@ -112,10 +112,14 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int } - /* If truncate N option is selected, and sequence has Ns, then */ - /* change 3' cut site to be the base before the first N */ - if (trunc_n && ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n")))) { - three_prime_cut = npos - fqrec->seq.s; + /* If truncate N option is selected, and sequence has Ns, then + * change 3' cut site to be the base before the first N. + * If drop N option is selected, omit the sequence. */ + if ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n"))) { + if (trunc_n) + three_prime_cut = npos - fqrec->seq.s; + else if (drop_n) + three_prime_cut = five_prime_cut = -1; } /* if cutting length is less than threshold then return -1 for both */ diff --git a/src/trim_paired.c b/src/trim_paired.c index 119c3fd..e015a2e 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -16,7 +16,7 @@ __KSEQ_READ int paired_qual_threshold = 20; int paired_length_threshold = 20; -static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; +static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xnNg"; static struct option paired_long_options[] = { { "qual-type", required_argument, 0, 't' }, { "pe-file1", required_argument, 0, 'f' }, @@ -30,6 +30,7 @@ static struct option paired_long_options[] = { { "length-threshold", required_argument, 0, 'l' }, { "no-fiveprime", no_argument, 0, 'x' }, { "truncate-n", no_argument, 0, 'n' }, + { "drop-n", no_argument, 0, 'N' }, { "gzip-output", no_argument, 0, 'g' }, { "output-combo-all", required_argument, 0, 'M' }, { "quiet", no_argument, 0, 'z' }, @@ -95,6 +96,7 @@ void paired_usage (int status, char *msg) { " trimming. Default %4$d.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" + "-N, --drop-n Discard sequences containing an N.\n" "-g, --gzip-output Output gzipped files.\n" "--quiet Do not output trimming info\n" "--help Display this help and exit\n" @@ -154,6 +156,7 @@ int paired_main(int argc, char *argv[]) { int quiet = 0; int no_fiveprime = 0; int trunc_n = 0; + int drop_n = 0; int gzip_output = 0; int combo_all=0; int combo_s=0; @@ -246,6 +249,10 @@ int paired_main(int argc, char *argv[]) { trunc_n = 1; break; + case 'N': + drop_n = 1; + break; + case 'g': gzip_output = 1; break; @@ -276,6 +283,11 @@ int paired_main(int argc, char *argv[]) { paired_usage(EXIT_FAILURE, "****Error: Quality type is required."); } + if (trunc_n && drop_n) { + fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n"); + return EXIT_FAILURE; + } + /* make sure minimum input filenames are specified */ if (!infn1 && !infnc) { paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument."); @@ -414,8 +426,8 @@ int paired_main(int argc, char *argv[]) { break; } - p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); - p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); + p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); total += 2; if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut); diff --git a/src/trim_single.c b/src/trim_single.c index 3a15274..d47feab 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -15,7 +15,7 @@ __KSEQ_READ int single_qual_threshold = 20; int single_length_threshold = 20; -static const char *single_short_options = "df:t:o:q:l:zxng"; +static const char *single_short_options = "df:t:o:q:l:zxnNg"; static struct option single_long_options[] = { { "fastq-file", required_argument, 0, 'f' }, { "output-file", required_argument, 0, 'o' }, @@ -25,6 +25,7 @@ static struct option single_long_options[] = { { "no-fiveprime", no_argument, 0, 'x' }, { "trunc-n", no_argument, 0, 'n' }, { "truncate-n", no_argument, 0, 'n' }, + { "drop-n", no_argument, 0, 'N' }, { "gzip-output", no_argument, 0, 'g' }, { "quiet", no_argument, 0, 'z' }, { "debug", no_argument, 0, 'd' }, @@ -55,6 +56,7 @@ void single_usage(int status, char *msg) { " trimming. Default %4$d.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" + "-N, --drop-n Discard sequences containing an N.\n" "-g, --gzip-output Output gzipped files.\n" "--quiet Do not output trimming info\n" "--help Display this help and exit\n" @@ -94,6 +96,7 @@ int single_main(int argc, char *argv[]) { int quiet = 0; int no_fiveprime = 0; int trunc_n = 0; + int drop_n = 0; int gzip_output = 0; int total=0; @@ -155,6 +158,10 @@ int single_main(int argc, char *argv[]) { trunc_n = 1; break; + case 'N': + drop_n = 1; + break; + case 'g': gzip_output = 1; break; @@ -210,12 +217,17 @@ int single_main(int argc, char *argv[]) { } } + if (trunc_n && drop_n) { + fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n"); + return EXIT_FAILURE; + } + fqrec = kseq_init(se); while ((l = kseq_read(fqrec)) >= 0) { - p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); total++; if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);