From 50b58d1dedaa9a8573dad8531ba51e8570ed61c1 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:35:31 -0700 Subject: [PATCH 1/7] Sync long options with short options so --debug works for -d --- src/trim_paired.c | 1 + src/trim_single.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/trim_paired.c b/src/trim_paired.c index dcf2eb1..0b71ecb 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -32,6 +32,7 @@ static struct option paired_long_options[] = { {"gzip-output", no_argument, 0, 'g'}, {"output-combo-all", required_argument, 0, 'M'}, {"quiet", no_argument, 0, 'z'}, + {"debug", no_argument, 0, 'd'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} diff --git a/src/trim_single.c b/src/trim_single.c index 2f59921..c3e94e6 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -25,6 +25,7 @@ static struct option single_long_options[] = { {"discard-n", no_argument, 0, 'n'}, {"gzip-output", no_argument, 0, 'g'}, {"quiet", no_argument, 0, 'z'}, + {"debug", no_argument, 0, 'd'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} From 3d240675816b6b801d2009de8dc83f775203d53a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 25 Feb 2015 11:15:06 -0800 Subject: [PATCH 2/7] Adjust single-end -n long option name to match paired-end mode Standardizes on --truncate-n. This preserves the --trunc-n long name previously mentioned in the SE usage. It removes the --discard-n long name only ever used internally, but never documented. --- src/trim_single.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/trim_single.c b/src/trim_single.c index c3e94e6..efad0ec 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -22,7 +22,8 @@ static struct option single_long_options[] = { {"qual-threshold", required_argument, 0, 'q'}, {"length-threshold", required_argument, 0, 'l'}, {"no-fiveprime", no_argument, 0, 'x'}, - {"discard-n", no_argument, 0, 'n'}, + {"trunc-n", no_argument, 0, 'n'}, + {"truncate-n", no_argument, 0, 'n'}, {"gzip-output", no_argument, 0, 'g'}, {"quiet", no_argument, 0, 'z'}, {"debug", no_argument, 0, 'd'}, @@ -43,7 +44,7 @@ Options:\n\ fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ -l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ -x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --trunc-n, Truncate sequences at position of first N.\n\ +-n, --truncate-n, Truncate sequences at position of first N.\n\ -g, --gzip-output, Output gzipped files.\n\ --quiet, Don't print out any trimming information\n\ --help, display this help and exit\n\ From a40f45c48722ed174df458b9a6f083f68d60356a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:40:26 -0700 Subject: [PATCH 3/7] Move short option spec closer to long option spec so they don't drift This will help keep them in sync when updating options. --- src/trim_paired.c | 3 ++- src/trim_single.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/trim_paired.c b/src/trim_paired.c index 0b71ecb..df119a9 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -16,6 +16,7 @@ __KSEQ_READ int paired_qual_threshold = 20; int paired_length_threshold = 20; +static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; static struct option paired_long_options[] = { {"qual-type", required_argument, 0, 't'}, {"pe-file1", required_argument, 0, 'f'}, @@ -122,7 +123,7 @@ int paired_main(int argc, char *argv[]) { while (1) { int option_index = 0; - optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index); + optc = getopt_long(argc, argv, paired_short_options, paired_long_options, &option_index); if (optc == -1) break; diff --git a/src/trim_single.c b/src/trim_single.c index efad0ec..f8a4dcb 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -15,6 +15,7 @@ __KSEQ_READ int single_qual_threshold = 20; int single_length_threshold = 20; +static const char *single_short_options = "df:t:o:q:l:zxng"; static struct option single_long_options[] = { {"fastq-file", required_argument, 0, 'f'}, {"output-file", required_argument, 0, 'o'}, @@ -78,7 +79,7 @@ int single_main(int argc, char *argv[]) { while (1) { int option_index = 0; - optc = getopt_long(argc, argv, "df:t:o:q:l:zxng", single_long_options, &option_index); + optc = getopt_long(argc, argv, single_short_options, single_long_options, &option_index); if (optc == -1) break; From 2de50e36c7c2b08daa60df49b0532b050ad85f3e Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 25 Feb 2015 11:17:44 -0800 Subject: [PATCH 4/7] Reformat long option structs for readability and maintainability No functional change, only whitespace (compare with git diff -w). --- src/trim_paired.c | 32 ++++++++++++++++---------------- src/trim_single.c | 22 +++++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/trim_paired.c b/src/trim_paired.c index df119a9..8cba95c 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -18,22 +18,22 @@ int paired_length_threshold = 20; static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; static struct option paired_long_options[] = { - {"qual-type", required_argument, 0, 't'}, - {"pe-file1", required_argument, 0, 'f'}, - {"pe-file2", required_argument, 0, 'r'}, - {"pe-combo", required_argument, 0, 'c'}, - {"output-pe1", required_argument, 0, 'o'}, - {"output-pe2", required_argument, 0, 'p'}, - {"output-single", required_argument, 0, 's'}, - {"output-combo", required_argument, 0, 'm'}, - {"qual-threshold", required_argument, 0, 'q'}, - {"length-threshold", required_argument, 0, 'l'}, - {"no-fiveprime", no_argument, 0, 'x'}, - {"truncate-n", no_argument, 0, 'n'}, - {"gzip-output", no_argument, 0, 'g'}, - {"output-combo-all", required_argument, 0, 'M'}, - {"quiet", no_argument, 0, 'z'}, - {"debug", no_argument, 0, 'd'}, + { "qual-type", required_argument, 0, 't' }, + { "pe-file1", required_argument, 0, 'f' }, + { "pe-file2", required_argument, 0, 'r' }, + { "pe-combo", required_argument, 0, 'c' }, + { "output-pe1", required_argument, 0, 'o' }, + { "output-pe2", required_argument, 0, 'p' }, + { "output-single", required_argument, 0, 's' }, + { "output-combo", required_argument, 0, 'm' }, + { "qual-threshold", required_argument, 0, 'q' }, + { "length-threshold", required_argument, 0, 'l' }, + { "no-fiveprime", no_argument, 0, 'x' }, + { "truncate-n", no_argument, 0, 'n' }, + { "gzip-output", no_argument, 0, 'g' }, + { "output-combo-all", required_argument, 0, 'M' }, + { "quiet", no_argument, 0, 'z' }, + { "debug", no_argument, 0, 'd' }, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} diff --git a/src/trim_single.c b/src/trim_single.c index f8a4dcb..5d8def0 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -17,17 +17,17 @@ int single_length_threshold = 20; static const char *single_short_options = "df:t:o:q:l:zxng"; static struct option single_long_options[] = { - {"fastq-file", required_argument, 0, 'f'}, - {"output-file", required_argument, 0, 'o'}, - {"qual-type", required_argument, 0, 't'}, - {"qual-threshold", required_argument, 0, 'q'}, - {"length-threshold", required_argument, 0, 'l'}, - {"no-fiveprime", no_argument, 0, 'x'}, - {"trunc-n", no_argument, 0, 'n'}, - {"truncate-n", no_argument, 0, 'n'}, - {"gzip-output", no_argument, 0, 'g'}, - {"quiet", no_argument, 0, 'z'}, - {"debug", no_argument, 0, 'd'}, + { "fastq-file", required_argument, 0, 'f' }, + { "output-file", required_argument, 0, 'o' }, + { "qual-type", required_argument, 0, 't' }, + { "qual-threshold", required_argument, 0, 'q' }, + { "length-threshold", required_argument, 0, 'l' }, + { "no-fiveprime", no_argument, 0, 'x' }, + { "trunc-n", no_argument, 0, 'n' }, + { "truncate-n", no_argument, 0, 'n' }, + { "gzip-output", no_argument, 0, 'g' }, + { "quiet", no_argument, 0, 'z' }, + { "debug", no_argument, 0, 'd' }, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} From cd8b0f807a086bff7672d67375790745f5773358 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Aug 2014 11:41:12 -0700 Subject: [PATCH 5/7] Overhaul usage formatting, both in code and when displayed The usage docs now indicate option arguments and are easier to read. If the usage was specifically requested with --help, then it is printed to stdout instead of stderr. This is useful for the common idiom of asking for help and piping to a pager like less or more (without redirecting stderr). --- src/sickle.h | 2 + src/trim_paired.c | 104 +++++++++++++++++++++++++++++++--------------- src/trim_single.c | 54 ++++++++++++++++-------- 3 files changed, 110 insertions(+), 50 deletions(-) diff --git a/src/sickle.h b/src/sickle.h index e7b679c..e78e667 100644 --- a/src/sickle.h +++ b/src/sickle.h @@ -61,6 +61,8 @@ exit(EXIT_SUCCESS); \ break; /* end code drawn from system.h */ +#define STDERR_OR_OUT(status) (status == EXIT_SUCCESS ? stdout : stderr) + typedef enum { PHRED, SANGER, diff --git a/src/trim_paired.c b/src/trim_paired.c index 8cba95c..119c3fd 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -40,40 +40,78 @@ static struct option paired_long_options[] = { }; void paired_usage (int status, char *msg) { + static const char *usage_format = + "\n" + "If you have separate files for forward and reverse reads:\n" + " Usage: %1$s pe [options] -f \n" + " %2$*3$s -r \n" + " %2$*3$s -t \n" + " %2$*3$s -o \n" + " %2$*3$s -p \n" + " %2$*3$s -s \n" + "\n" + "If you have one file with interleaved forward and reverse reads:\n" + " Usage: %1$s pe [options] -c \n" + " %2$*3$s -t \n" + " %2$*3$s -m \n" + " %2$*3$s -s \n" + "\n" + "If you have one file with interleaved reads as input and you want\n" + "ONLY one interleaved file as output:\n" + " Usage: %1$s pe [options] -c \n" + " %2$*3$s -t \n" + " %2$*3$s -M \n" + "\n" + "Options:\n" + "\n" + "Paired-end separated reads\n" + "--------------------------\n" + "-f, --pe-file1 Input paired-end forward fastq file\n" + "-r, --pe-file2 Input paired-end reverse fastq file\n" + " (input files must have same number of records)\n" + "-o, --output-pe1 Output trimmed forward fastq file\n" + "-p, --output-pe2 Output trimmed reverse fastq file. Must use -s option.\n" + "\n" + "Paired-end interleaved reads\n" + "----------------------------\n" + "-c, --pe-combo Combined (interleaved) input paired-end fastq\n" + "-m, --output-combo Output combined (interleaved) paired-end fastq file.\n" + " Must use -s option.\n" + "-M, --output-combo-all Output combined (interleaved) paired-end fastq file\n" + " with any discarded read written to output file as a\n" + " single N. Cannot be used with the -s option.\n" + "\n" + "Global options\n" + "--------------\n" + "-t TYPE, --qual-type TYPE Type of quality values, one of:\n" + " solexa (CASAVA < 1.3)\n" + " illumina (CASAVA 1.3 to 1.7)\n" + " sanger (which is CASAVA >= 1.8)\n" + " (required)\n" + "-s FILE, --output-single FILE Output trimmed singles fastq file\n" + "-q #, --qual-threshold # Threshold for trimming based on average quality\n" + " in a window. Default %3$d.\n" + "-l #, --length-threshold # Threshold to keep a read based on length after\n" + " trimming. Default %4$d.\n" + "-x, --no-fiveprime Don't do five prime trimming.\n" + "-n, --truncate-n Truncate sequences at position of first N.\n" + "-g, --gzip-output Output gzipped files.\n" + "--quiet Do not output trimming info\n" + "--help Display this help and exit\n" + "--version Output version information and exit\n" + ; + + if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg ); + + fprintf( + STDERR_OR_OUT(status), + usage_format, + PROGRAM_NAME, + "", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself + paired_qual_threshold, + paired_length_threshold + ); - fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n"); - fprintf(stderr, "Usage: %s pe [options] -f -r -t -o -p -s \n\n", PROGRAM_NAME); - fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n"); - fprintf(stderr, "Usage: %s pe [options] -c -t -m -s \n\n\ -If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\ -Usage: %s pe [options] -c -t -M \n\n", PROGRAM_NAME, PROGRAM_NAME); - fprintf(stderr, "Options:\n\ -Paired-end separated reads\n\ ---------------------------\n\ --f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\ --r, --pe-file2, Input paired-end reverse fastq file\n\ --o, --output-pe1, Output trimmed forward fastq file\n\ --p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\ -Paired-end interleaved reads\n\ -----------------------------\n"); - fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\ --m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\ --M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\ -Global options\n\ ---------------\n\ --t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n"); - fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\ --q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ --l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ --x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --truncate-n, Truncate sequences at position of first N.\n"); - - - fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\ ---help, display this help and exit\n\ ---version, output version information and exit\n\n"); - - if (msg) fprintf(stderr, "%s\n\n", msg); exit(status); } diff --git a/src/trim_single.c b/src/trim_single.c index 5d8def0..3a15274 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -34,24 +34,44 @@ static struct option single_long_options[] = { }; void single_usage(int status, char *msg) { + static const char *usage_format = + "\n" + "Usage: %1$s se [options] -f \n" + " %2$*3$s -t \n" + " %2$*3$s -o \n" + "\n" + "Options:\n" + "\n" + "-f FILE, --fastq-file FILE Input fastq file (required)\n" + "-o FILE, --output-file FILE Output trimmed fastq file (required)\n" + "-t TYPE, --qual-type TYPE Type of quality values, one of:\n" + " solexa (CASAVA < 1.3)\n" + " illumina (CASAVA 1.3 to 1.7)\n" + " sanger (which is CASAVA >= 1.8)\n" + " (required)\n" + "-q #, --qual-threshold # Threshold for trimming based on average quality\n" + " in a window. Default %3$d.\n" + "-l #, --length-threshold # Threshold to keep a read based on length after\n" + " trimming. Default %4$d.\n" + "-x, --no-fiveprime Don't do five prime trimming.\n" + "-n, --truncate-n Truncate sequences at position of first N.\n" + "-g, --gzip-output Output gzipped files.\n" + "--quiet Do not output trimming info\n" + "--help Display this help and exit\n" + "--version Output version information and exit\n" + ; + + if (msg) fprintf( STDERR_OR_OUT(status), "%s\n", msg ); + + fprintf( + STDERR_OR_OUT(status), + usage_format, + PROGRAM_NAME, + "", strlen(PROGRAM_NAME) + 3, // +3 makes it possible to keep text visually aligned in the format string itself + single_qual_threshold, + single_length_threshold + ); - fprintf(stderr, "\nUsage: %s se [options] -f -t -o \n\ -\n\ -Options:\n\ --f, --fastq-file, Input fastq file (required)\n\ --t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n\ --o, --output-file, Output trimmed fastq file (required)\n", PROGRAM_NAME); - - fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\ --l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\ --x, --no-fiveprime, Don't do five prime trimming.\n\ --n, --truncate-n, Truncate sequences at position of first N.\n\ --g, --gzip-output, Output gzipped files.\n\ ---quiet, Don't print out any trimming information\n\ ---help, display this help and exit\n\ ---version, output version information and exit\n\n"); - - if (msg) fprintf(stderr, "%s\n\n", msg); exit(status); } From beebcdbed7054cfae4caf343cedaf4e226585e58 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 8 Oct 2014 16:49:32 -0700 Subject: [PATCH 6/7] Declare C99 compat Silences warnings about //-style comments and long strings. Since kseq.h uses inline functions, a feature of C99, it's not useful pretending to be C89 compat (GCC's default). --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 24ce4ac..0326696 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PROGRAM_NAME = sickle VERSION = 1.33 CC = gcc -CFLAGS = -Wall -pedantic -DVERSION=$(VERSION) +CFLAGS = -Wall -pedantic -std=c99 -DVERSION=$(VERSION) DEBUG = -g OPT = -O3 ARCHIVE = $(PROGRAM_NAME)_$(VERSION) From 6597584b5d952c63b4b9b7b7fcd805e4ccd97e88 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 24 Sep 2014 15:43:52 -0700 Subject: [PATCH 7/7] Optionally use a fixed window size instead of 0.1 of the read length A fixed window size provides a stricter control on average base qualities for datasets with a wide range of read lengths. --- src/sickle.h | 2 +- src/sliding.c | 11 ++++++----- src/trim_paired.c | 18 +++++++++++++++--- src/trim_single.c | 16 ++++++++++++++-- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/sickle.h b/src/sickle.h index e78e667..a6b9e1f 100644 --- a/src/sickle.h +++ b/src/sickle.h @@ -98,6 +98,6 @@ typedef struct __cutsites_ { /* Function Prototypes */ int single_main (int argc, char *argv[]); int paired_main (int argc, char *argv[]); -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug); +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int window_size, int no_fiveprime, int trunc_n, int debug); #endif /*SICKLE_H*/ diff --git a/src/sliding.c b/src/sliding.c index 7573106..d087dd6 100644 --- a/src/sliding.c +++ b/src/sliding.c @@ -32,9 +32,10 @@ int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) { } -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) { +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int window_size, int no_fiveprime, int trunc_n, int debug) { - int window_size = (int) (0.1 * fqrec->seq.l); + if (!window_size) + window_size = (int) (0.1 * fqrec->seq.l); int i,j; int window_start=0; int window_total=0; @@ -53,9 +54,9 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int return (retvals); } - /* if the seq length is less then 10bp, */ + /* if the seq length is less then 10bp or the fixed window size is larger than the sequence length, */ /* then make the window size the length of the seq */ - if (window_size == 0) window_size = fqrec->seq.l; + if (window_size == 0 || window_size > fqrec->seq.l) window_size = fqrec->seq.l; for (i=0; iqual.s[i], qualtype, fqrec, i); @@ -65,7 +66,7 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int window_avg = (double)window_total / (double)window_size; - if (debug) printf ("no_fiveprime: %d, found 5prime: %d, window_avg: %f\n", no_fiveprime, found_five_prime, window_avg); + if (debug) printf ("no_fiveprime: %d, found 5prime: %d, window_avg: %f, window_size: %d\n", no_fiveprime, found_five_prime, window_avg, window_size); /* Finding the 5' cutoff */ /* Find when the average quality in the window goes above the threshold starting from the 5' end */ diff --git a/src/trim_paired.c b/src/trim_paired.c index 119c3fd..ede1072 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -16,7 +16,7 @@ __KSEQ_READ int paired_qual_threshold = 20; int paired_length_threshold = 20; -static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; +static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:w:xng"; static struct option paired_long_options[] = { { "qual-type", required_argument, 0, 't' }, { "pe-file1", required_argument, 0, 'f' }, @@ -28,6 +28,7 @@ static struct option paired_long_options[] = { { "output-combo", required_argument, 0, 'm' }, { "qual-threshold", required_argument, 0, 'q' }, { "length-threshold", required_argument, 0, 'l' }, + { "window", required_argument, 0, 'w' }, { "no-fiveprime", no_argument, 0, 'x' }, { "truncate-n", no_argument, 0, 'n' }, { "gzip-output", no_argument, 0, 'g' }, @@ -93,6 +94,8 @@ void paired_usage (int status, char *msg) { " in a window. Default %3$d.\n" "-l #, --length-threshold # Threshold to keep a read based on length after\n" " trimming. Default %4$d.\n" + "-w #, --window # Fixed window size to use. Default is a dynamic\n" + " window size of 0.1 of the read length.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" "-g, --gzip-output Output gzipped files.\n" @@ -158,6 +161,7 @@ int paired_main(int argc, char *argv[]) { int combo_all=0; int combo_s=0; int total=0; + int window_size = 0; while (1) { int option_index = 0; @@ -238,6 +242,14 @@ int paired_main(int argc, char *argv[]) { } break; + case 'w': + window_size = atoi(optarg); + if (window_size < 1) { + fprintf(stderr, "Fixed window size must be >= 1\n"); + return EXIT_FAILURE; + } + break; + case 'x': no_fiveprime = 1; break; @@ -414,8 +426,8 @@ int paired_main(int argc, char *argv[]) { break; } - p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); - p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, window_size, no_fiveprime, trunc_n, debug); + p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, window_size, no_fiveprime, trunc_n, debug); total += 2; if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut); diff --git a/src/trim_single.c b/src/trim_single.c index 3a15274..f0d6001 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -15,7 +15,7 @@ __KSEQ_READ int single_qual_threshold = 20; int single_length_threshold = 20; -static const char *single_short_options = "df:t:o:q:l:zxng"; +static const char *single_short_options = "df:t:o:q:l:w:zxng"; static struct option single_long_options[] = { { "fastq-file", required_argument, 0, 'f' }, { "output-file", required_argument, 0, 'o' }, @@ -23,6 +23,7 @@ static struct option single_long_options[] = { { "qual-threshold", required_argument, 0, 'q' }, { "length-threshold", required_argument, 0, 'l' }, { "no-fiveprime", no_argument, 0, 'x' }, + { "window", required_argument, 0, 'w' }, { "trunc-n", no_argument, 0, 'n' }, { "truncate-n", no_argument, 0, 'n' }, { "gzip-output", no_argument, 0, 'g' }, @@ -53,6 +54,8 @@ void single_usage(int status, char *msg) { " in a window. Default %3$d.\n" "-l #, --length-threshold # Threshold to keep a read based on length after\n" " trimming. Default %4$d.\n" + "-w #, --window # Fixed window size to use. Default is a dynamic\n" + " window size of 0.1 of the read length.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" "-g, --gzip-output Output gzipped files.\n" @@ -96,6 +99,7 @@ int single_main(int argc, char *argv[]) { int trunc_n = 0; int gzip_output = 0; int total=0; + int window_size = 0; while (1) { int option_index = 0; @@ -147,6 +151,14 @@ int single_main(int argc, char *argv[]) { } break; + case 'w': + window_size = atoi(optarg); + if (window_size < 1) { + fprintf(stderr, "Fixed window size must be >= 1\n"); + return EXIT_FAILURE; + } + break; + case 'x': no_fiveprime = 1; break; @@ -215,7 +227,7 @@ int single_main(int argc, char *argv[]) { while ((l = kseq_read(fqrec)) >= 0) { - p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, window_size, no_fiveprime, trunc_n, debug); total++; if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);