diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..135df85
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# ignore C object files
+*.o
+# ignore executables
+sabre
+sabre-dev
+metrics
+# ignore vim swap files
+*.swp
+*.gz
+/tmp
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 2ff3141..0000000
--- a/Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-PROGRAM_NAME = sabre
-VERSION = 1.00
-CC = gcc
-CFLAGS = -Wall -pedantic -DVERSION=$(VERSION)
-DEBUG = -g
-OPT = -O3
-ARCHIVE = $(PROGRAM_NAME)_$(VERSION)
-LDFLAGS = -lz
-SDIR = src
-
-.PHONY: clean default build distclean dist debug
-
-default: build
-
-barcode.o: $(SDIR)/barcode.c
-	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
-
-demulti_single.o: $(SDIR)/demulti_single.c $(SDIR)/sabre.h $(SDIR)/kseq.h
-	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
-
-demulti_paired.o: $(SDIR)/demulti_paired.c $(SDIR)/sabre.h $(SDIR)/kseq.h
-	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
-
-sabre.o: $(SDIR)/sabre.c $(SDIR)/sabre.h
-	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
-
-clean:
-	rm -rf *.o $(SDIR)/*.gch ./sabre
-
-distclean: clean
-	rm -rf *.tar.gz
-
-dist:
-	tar -zcf $(ARCHIVE).tar.gz src Makefile
-
-build: barcode.o demulti_single.o demulti_paired.o sabre.o
-	$(CC) $(CFLAGS) $(OPT) $? -o $(PROGRAM_NAME) $(LDFLAGS)
-
-debug:
-	$(MAKE) build "CFLAGS=-Wall -pedantic -g -DDEBUG"
-
diff --git a/README.md b/README.md
index 5e84da3..ab3aa22 100644
--- a/README.md
+++ b/README.md
@@ -1,105 +1,58 @@
-# sabre - A barcode demultiplexing and trimming tool for FastQ files
+> This is a fork of the [original repo](https://github.com/najoshi/sabre). I might be taking this tool into a different direction to what was originally intended
 
-## About
+# sabre
 
-Next-generation sequencing can currently produce hundreds of millions of reads
-per lane of sample and that number increases at a dizzying rate.  Barcoding
-individual sequences for multiple lines or multiple species is a cost-efficient
-method to sequence and analyze a broad range of data.
+> A cellular barcode demultiplexing tool of FASTQ files
 
-Sabre is a tool that will demultiplex barcoded reads into separate files. 
-It will work on both single-end and paired-end data in fastq format.
-It simply compares the provided barcodes with each read and separates
-the read into its appropriate barcode file, after stripping the barcode from
-the read (and also stripping the quality values of the barcode bases).  If
-a read does not have a recognized barcode, then it is put into the unknown file.  
-Sabre also has an option (-m) to allow mismatches of the barcodes.
+## Content
 
-Sabre also supports gzipped file inputs.  Also, since sabre does not use the 
-quality values in any way, it can be used on fasta data that is converted to
-fastq by creating fake quality values.
+- [Install](#install)
+- [Quick start](#quick-start)
+- [Usage](#usage)
 
-Finally, after demultiplexing, sabre outputs a summary of how many records
-went into each barcode file.
+## Install
 
-## Requirements 
+```BASH
+git clone https://github.com/serine/sabre
+cd src
+make
+```
 
-Sabre requires a C compiler; GCC or clang are recommended.  Sabre
-relies on Heng Li's kseq.h, which is bundled with the source.
+## Quick start
 
-Sabre also requires Zlib, which can be obtained at
-<http://www.zlib.net/>.
-
-## Building and Installing Sabre
-
-To build Sabre, enter:
-
-    make
-
-Then, copy or move "sabre" to a directory in your $PATH.
+```BASH
+sabre -f MultiplexRNASeq_S1_R1_001.fastq.gz \
+      -r MultiplexRNASeq_S1_R2_001.fastq.gz \
+      -b barcodes.txt \
+      -c \
+      -u \
+      -m 2 \
+      -l 10 \
+      -a 1 \
+      -s sabre.txt \
+      -t 12
+```
 
 ## Usage
 
-Sabre has two modes to work with both paired-end and single-end
-reads: `sabre se` and `sabre pe`.
-
-Running sabre by itself will print the help:
-
-    sabre
-
-Running sabre with either the "se" or "pe" commands will give help
-specific to those commands:
-
-    sabre se
-    sabre pe
-
-### Sabre Single End (`sabre se`)
-
-`sabre se` takes an input fastq file and an input barcode data file and outputs 
-the reads demultiplexed into separate files using the file names from the data file.
-The barcodes will be stripped from the reads and the quality values of the barcode
-bases will also be removed.  Any reads with unknown barcodes get put into the "unknown" 
-file specified on the command line.  The -m option allows for mismatches in the barcodes.
-
-#### Barcode data file format for single end
-
-    barcode1 barcode1_output_file.fastq
-    barcode2 barcode2_output_file.fastq
-    etc...
-
-Be aware that if you do not format the barcode data file correctly, sabre will not work properly.
-
-#### Example
-
-    sabre se -f input_file.fastq -b barcode_data.txt -u unknown_barcode.fastq
-    sabre se -m 1 -f input_file.fastq -b barcode_data.txt -u unknown_barcode.fastq
-
-### Sabre Paired End (`sabre pe`)
-
-`sabre pe` takes two paired-end files and a barcode data file as input and outputs
-the reads demultiplexed into separate paired-end files using the file names from the 
-data file.  The barcodes will be stripped from the reads and the quality values of the barcode 
-bases will also be removed.  Any reads with unknown barcodes get put into the "unknown" files 
-specified on the command line.  It also has an option (-c) to remove barcodes from both files.  
-Using this option means that if sabre finds a barcode in the first file, it assumes the paired 
-read in the other file has the same barcode and will strip it (along with the quality values).  
-The -m option allows for mismatches in the barcodes.
-
-#### Barcode data file format for paired end
+> This tool is under development and this is very much an alpha version
+> In it's current form the tool is highly customised a particular multiplexing protocol
 
-    barcode1 barcode1_output_file1.fastq barcode1_output_file2.fastq
-    barcode2 barcode2_output_file1.fastq barcode2_output_file2.fastq
-    etc...
+### Cellular barcodes
 
-Be aware that if you do not format the barcode data file correctly, sabre will not work properly.
+In order to demultiplex the use needs to provide `barcodes.txt` file, which is three column tab delimited file
 
-#### Examples
+```
+sample_name group barcode
+```
 
-    sabre pe -f input_file1.fastq -r input_file2.fastq -b barcode_data.txt \
-    -u unknown_barcode1.fastq -w unknown_barcode1.fastq
+currently group is semi-redundant column, it there for a feature that in the development. for most use cases group can equals to barcode
 
-    sabre pe -c -f input_file1.fastq -r input_file2.fastq -b barcode_data.txt \
-    -u unknown_barcode1.fastq -w unknown_barcode1.fastq
+e.g
 
-    sabre pe -m 2 -f input_file1.fastq -r input_file2.fastq -b barcode_data.txt \
-    -u unknown_barcode1.fastq -w unknown_barcode1.fastq
+```
+cntr_rep1    TAAGGCGA        TAAGGCGA
+cntr_rep2    CGTACTAG        CGTACTAG
+treat_rep1   AGGCAGAA        AGGCAGAA
+treat_rep2   TCCTGAGC        TCCTGAGC
+```
diff --git a/docs/definitions.md b/docs/definitions.md
new file mode 100644
index 0000000..b0a8921
--- /dev/null
+++ b/docs/definitions.md
@@ -0,0 +1,37 @@
+define blocks along the read
+
+BARCODE 
+UMI
+READ
+
+then set values to different block
+
+BARCODE = 8
+UMI = 10
+
+## Andrew's suggestion
+
+```
+--input sample_A_R1.fastq.gz:i8{index1},r151{read1},i8{index2}
+```
+
+```
+--fq1 sample_A_R1.fastq.gz:i8{index1},r151{READ1},i8{index2}
+
+--fq2 sample_A_R2.fastq.gz:i8{index1},r151{READ1},i8{index2}
+```
+
+We need to check that BARCODE == index1 in both fq1 and fq2 but also check that index1_fq1 == index1_fq2
+
+```
+--merge 12 merge R1 into R2
+--merge 21 merge R2 into R1
+```
+
+either way resulting read is R1
+
+```
+--fq1 sample_A_R1.fastq.gz:8index1,*index2
+
+--fq2 sample_A_R2.fastq.gz:i8{index1},r151{read2},i8{index2}
+```
diff --git a/docs/modes.md b/docs/modes.md
new file mode 100644
index 0000000..62bca89
--- /dev/null
+++ b/docs/modes.md
@@ -0,0 +1,72 @@
+# Sabre
+
+## Different running modes
+
+DOCS: In each case BARCODE and/or UMI are trimed off and
+put into FASTQ header:
+
+Not sure if I should have:
+
+       BARCODE always has a precedent i.e BARCODE:UMI
+       OR
+       It follows the same structure as per experiment i.e
+       if BARCODE+UMI then BARCODE:UMI
+       else if UMI+BARCODE then UMI:BARCODE
+
+All modes that begin with 3 will return single - R1 file, merging
+R1 read into R2 header and renaming R2 into R1
+
+10 = single-end where R1 has the following structure:
+
+         R1 -->
+         BARCODE+READ
+
+20 = paired-end where R1 and R2 have the following structure:
+
+         R1 -->                 <--R2
+         BARCODE+READ----READ+BARCODE
+
+this mode returns single file (R1) with barcode appended and into R1 header
+
+30 = paired-end where R1 and R2 have the following structure:
+
+         R1 -->     <-R2
+         BARCODE----READ
+
+40 = paired-end where
+
+11 = single-end where R1 has the following structure:
+
+         R1 -->
+         BARCODE+UMI+READ
+
+21 = paired-end where R1 and R2 have the following structure:
+
+         R1 -->                         <--R2
+         BARCODE+UMI+READ----READ+UMI+BARCODE
+
+this mode returns single file (R1) with barcode appended and into R1 header
+
+31 = paired-end where R1 and R2 have the following structure:
+
+         R1 -->         <-R2
+         BARCODE+UMI----READ
+
+NOTE this gives me room for yet another mode e.g 12, 22, 32
+
+12 = sinle-end where R1 has the following structure:
+
+         R1 -->
+         UMI+READ
+
+22 = paired-end where R1 and R2 have the following structure:
+
+         R1 -->         <--R2
+         UMI+READ----READ+UMI
+
+this mode returns single file (R1) with barcode appended and into R1 header
+
+32 = paired-end where R1 and R2 have the following structure:
+
+         R1 --> <-R2
+         UMI----READ
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..feb2a3d
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,50 @@
+# Source, Executable, Includes, Library Defines
+GIT_VERSION := "$(shell git describe --abbrev=7 --always --tags)"
+CC = gcc
+INCL = fastq.h sabre.h
+#SRC = demulti_paired.c demulti_single.c sabre.c utils.c
+SRC = sabre.c usage.c demultiplex.c utils.c fastq.c
+OBJ = $(SRC:.c=.o)
+DSRC=src
+
+#CFLAGS = -Wall -O2 -std=c99 -pedantic -DVERSION=$(VERSION)
+# need to quote GIT_VERSION so that the value gets passed as a string
+CFLAGS = -Wall -O2 -std=gnu99 -pedantic -DVERSION=\"$(GIT_VERSION)\"
+CFLAGSDEV = -Wall -O0 -g -std=gnu99 -DVERSION=\"$(GIT_VERSION)-dev\"
+
+LDFLAGS = -lz -lpthread
+GPROF = -pg
+EXE = sabre
+
+.PHONY: default
+
+default: build
+# a smarter way to have an if statement here instead of explicit grpof target
+# have a look at gcc -M
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $(SRC)
+
+usage.o: usage.h sabre.h fastq.h
+utils.o: utils.h sabre.h fastq.h
+demultiplex.o: demultiplex.h utils.h sabre.h fastq.h
+sabre.o: sabre.h
+
+build: $(OBJ)
+	$(CC) $(CFLAGS) $(OBJ) -o $(EXE) $(LDFLAGS)
+	#ln -sf $(DSRC)/$(EXE) ../sabre
+
+dev: $(OBJDEV)
+	$(CC) $(CFLAGSDEV) $(SRC) -o $(EXE)-dev $(LDFLAGS)
+
+metrics:
+	$(CC) $(CFLAGSDEV) -o metrics metrics.c $(LDFLAGS)
+
+gprof:
+	$(CC) $(CFLAGS) $(GPROF) $(SRC) -o $(EXE).gprof $(LDFLAGS)
+
+clean:
+	$(RM) $(OBJ) $(EXE) core
+
+clean-all:
+	$(RM) $(OBJ) $(EXE) $(EXE)-dev $(EXE).gprof core gmon.out metrics
diff --git a/src/barcode.c b/src/barcode.c
deleted file mode 100644
index a260281..0000000
--- a/src/barcode.c
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-int strncmp_with_mismatch (const char *s1, const char *s2, register size_t n, register size_t mismatch) {
-
-	register unsigned char u1, u2;
-	int cnt=0;
-
-	while (n-- > 0) {
-		u1 = (unsigned char) *s1++;
-		u2 = (unsigned char) *s2++;
-
-		if (u1 != u2) {
-			cnt++;
-			if (cnt > mismatch) return u1 - u2;
-		}
-
-		if (u1 == '\0') return 0;
-	}
-
-	return 0;
-}
diff --git a/src/demulti_paired.c b/src/demulti_paired.c
deleted file mode 100644
index 2acdb11..0000000
--- a/src/demulti_paired.c
+++ /dev/null
@@ -1,334 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <zlib.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <limits.h>
-#include <zlib.h>
-#include "sabre.h"
-#include "kseq.h"
-
-KSEQ_INIT(gzFile, gzread)
-
-
-static struct option paired_long_options[] = {
-	{"pe-file1", required_argument, 0, 'f'},
-	{"pe-file2", required_argument, 0, 'r'},
-	{"barcode-file", required_argument, 0, 'b'},
-	{"unknown-output1", required_argument, 0, 'u'},
-	{"unknown-output2", required_argument, 0, 'w'},
-	{"both-barcodes", optional_argument, 0, 'c'},
-	{"max-mismatch", optional_argument, 0, 'm'},
-	{"quiet", optional_argument, 0, 'z'},
-	{GETOPT_HELP_OPTION_DECL},
-	{GETOPT_VERSION_OPTION_DECL},
-	{NULL, 0, NULL, 0}
-};
-
-void paired_usage (int status) {
-
-	fprintf (stderr, "\nUsage: %s pe -f <paired-end fastq file 1> -r <paired-end fastq file 2> -b <barcode file> -u <unknown barcode output file 1> -w <unknown barcode output file 2>\n\n\
-Options:\n\
--f, --pe-file1, Input paired-end fastq file 1 (required, must have same number of records as pe2)\n\
--r, --pe-file2, Input paired-end fastq file 2 (required, must have same number of records as pe1)\n\
--b, --barcode-file, File with barcode and two output file names per line (required)\n", PROGRAM_NAME);
-
-fprintf (stderr, "-u, --unknown-output1, Output paired-end file 1 that contains records with no barcodes found. (required)\n\
--w, --unknown-output2, Output paired-end file 2 that contains records with no barcodes found. (required)\n\
--c, --both-barcodes, Optional flag that indicates that both fastq files have barcodes.\n\
--m <n>, --max-mismatch <n>, Optional argument that is the maximum number of mismatches allowed in a barcode. Default 0.\n");
-
-fprintf (stderr, "--quiet, don't print barcode matching info\n\
---help, display this help and exit\n\
---version, output version information and exit\n\n");
-
-	exit (status);
-}
-
-int paired_main (int argc, char *argv[]) {
-
-	gzFile pe1=NULL;
-	gzFile pe2=NULL;
-	kseq_t *fqrec1;
-	kseq_t *fqrec2;
-	int l1,l2;
-	FILE* barfile = NULL;
-	FILE* unknownfile1=NULL;
-	FILE* unknownfile2=NULL;
-	int debug=0;
-	int optc;
-	extern char *optarg;
-	char *infn1=NULL;
-	char *infn2=NULL;
-	char *barfn=NULL;
-	char *unknownfn1=NULL;
-	char *unknownfn2=NULL;
-	int both_have_barcodes=0;
-	barcode_data_paired *curr, *head, *temp;
-	char barcode [MAX_BARCODE_LENGTH];
-	char baroutfn1 [MAX_FILENAME_LENGTH];
-	char baroutfn2 [MAX_FILENAME_LENGTH];
-	int num_unknown=0;
-	int total=0;
-	int mismatch=0;
-	int quiet=0;
-
-
-	while (1) {
-		int option_index = 0;
-		optc = getopt_long (argc, argv, "dcf:r:b:u:w:m:z", paired_long_options, &option_index);
-
-		if (optc == -1) break;
-
-		switch (optc) {
-			if (paired_long_options[option_index].flag != 0) break;
-
-			case 'f':
-				infn1 = (char*) malloc (strlen (optarg) + 1);
-				strcpy (infn1, optarg);
-				break;
-
-			case 'r':
-				infn2 = (char*) malloc (strlen (optarg) + 1);
-				strcpy (infn2, optarg);
-				break;
-
-			case 'b':
-				barfn = (char*) malloc (strlen (optarg) + 1);
-				strcpy (barfn, optarg);
-				break;
-
-			case 'u':
-				unknownfn1 = (char*) malloc (strlen (optarg) + 1);
-				strcpy (unknownfn1, optarg);
-				break;
-
-			case 'w':
-				unknownfn2 = (char*) malloc (strlen (optarg) + 1);
-				strcpy (unknownfn2, optarg);
-				break;
-
-			case 'c':
-				both_have_barcodes=1;
-				break;
-
-			case 'm':
-				mismatch = atoi (optarg);
-				break;
-
-			case 'z':
-				quiet=1;
-				break;
-
-			case 'd':
-				debug = 1;
-				break;
-
-			case_GETOPT_HELP_CHAR(paired_usage);
-			case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
-
-			case '?':
-				paired_usage (EXIT_FAILURE);
-				break;
-
-			default:
-				paired_usage (EXIT_FAILURE);
-				break;
-		}
-	}
-
-
-	if (!infn1 || !infn2 || !unknownfn1 || !unknownfn2 || !barfn) {
-		paired_usage (EXIT_FAILURE);
-	}
-
-	if (!strcmp (infn1, infn2) || !strcmp (infn1, unknownfn1) || !strcmp (infn1, unknownfn2) ||
-		!strcmp (infn1, barfn) || !strcmp (infn2, unknownfn1) || !strcmp (infn2, unknownfn2) ||
-		!strcmp (infn2, barfn) || !strcmp (unknownfn1, unknownfn2) || !strcmp (unknownfn1, barfn) ||
-		!strcmp (unknownfn2, barfn)) {
-
-		fprintf (stderr, "Error: Duplicate input and/or output file names.\n");
-		return EXIT_FAILURE;
-	}
-
-	pe1 = gzopen (infn1, "r");
-	if (!pe1) {
-		fprintf (stderr, "Could not open input file 1 '%s'.\n", infn1);
-		return EXIT_FAILURE;
-	}
-
-	pe2 = gzopen (infn2, "r");
-	if (!pe2) {
-		fprintf (stderr, "Could not open input file 2 '%s'.\n", infn2);
-		return EXIT_FAILURE;
-	}
-
-	unknownfile1 = fopen (unknownfn1, "w");
-	if (!unknownfile1) {
-		fprintf (stderr, "Could not open unknown output file 1 '%s'.\n", unknownfn1);
-		return EXIT_FAILURE;
-	}
-
-	unknownfile2 = fopen (unknownfn2, "w");
-	if (!unknownfile2) {
-		fprintf (stderr, "Could not open unknown output file 2 '%s'.\n", unknownfn2);
-		return EXIT_FAILURE;
-	}
-
-	barfile = fopen (barfn, "r");
-	if (!barfile) {
-		fprintf (stderr, "Could not open barcode file '%s'.\n", barfn);
-		return EXIT_FAILURE;
-	}
-
-
-	/* Creating linked list of barcode data */
-	head = NULL;
-	while (fscanf (barfile, "%s%s%s", barcode, baroutfn1, baroutfn2) != EOF) {
-		curr = (barcode_data_paired*) malloc (sizeof (barcode_data_paired));
-		curr->bc = (char*) malloc (strlen(barcode) + 1);
-		strcpy (curr->bc, barcode);
-
-		curr->bcfile1 = fopen (baroutfn1, "w");
-		curr->bcfile2 = fopen (baroutfn2, "w");
-		curr->num_records = 0;
-
-		curr->next = head;
-		head = curr;
-	}
-
-
-	fqrec1 = kseq_init (pe1);
-	fqrec2 = kseq_init (pe2);
-
-	while ((l1 = kseq_read (fqrec1)) >= 0) {
-
-		l2 = kseq_read (fqrec2);
-		if (l2 < 0) {
-			fprintf (stderr, "Error: PE file 2 is shorter than PE file 1. Disregarding rest of PE file 1.\n");
-			break;
-		}
-
-
-		/* Go through all barcode data and check if any match to beginning of read */
-		/* If it does then put read in that barcode's file, otherwise put in unknown file */
-		curr = head;
-		while (curr) {
-			if (strncmp_with_mismatch (curr->bc, fqrec1->seq.s, strlen (curr->bc), mismatch) == 0) {
-				break;
-			}
-
-			curr = curr->next;
-		}
-
-
-		if (curr != NULL) {
-			fprintf (curr->bcfile1, "@%s", fqrec1->name.s);
-			if (fqrec1->comment.l) fprintf (curr->bcfile1, " %s\n", fqrec1->comment.s);
-			else fprintf (curr->bcfile1, "\n");
-
-			fprintf (curr->bcfile1, "%s\n", (fqrec1->seq.s)+strlen(curr->bc));
-
-			fprintf (curr->bcfile1, "+%s", fqrec1->name.s);
-			if (fqrec1->comment.l) fprintf (curr->bcfile1, " %s\n", fqrec1->comment.s);
-			else fprintf (curr->bcfile1, "\n");
-
-			fprintf (curr->bcfile1, "%s\n", (fqrec1->qual.s)+strlen(curr->bc));
-
-
-			fprintf (curr->bcfile2, "@%s", fqrec2->name.s);
-			if (fqrec2->comment.l) fprintf (curr->bcfile2, " %s\n", fqrec2->comment.s);
-			else fprintf (curr->bcfile2, "\n");
-
-			if (!both_have_barcodes) fprintf (curr->bcfile2, "%s\n", fqrec2->seq.s);
-			else fprintf (curr->bcfile2, "%s\n", (fqrec2->seq.s)+strlen(curr->bc));
-
-			fprintf (curr->bcfile2, "+%s", fqrec2->name.s);
-			if (fqrec2->comment.l) fprintf (curr->bcfile2, " %s\n", fqrec2->comment.s);
-			else fprintf (curr->bcfile2, "\n");
-
-			if (!both_have_barcodes) fprintf (curr->bcfile2, "%s\n", fqrec2->qual.s);
-			else fprintf (curr->bcfile2, "%s\n", (fqrec2->qual.s)+strlen(curr->bc));
-
-			curr->num_records += 2;
-		}
-
-		else {
-			fprintf (unknownfile1, "@%s", fqrec1->name.s);
-			if (fqrec1->comment.l) fprintf (unknownfile1, " %s\n", fqrec1->comment.s);
-			else fprintf (unknownfile1, "\n");
-
-			fprintf (unknownfile1, "%s\n", fqrec1->seq.s);
-
-			fprintf (unknownfile1, "+%s", fqrec1->name.s);
-			if (fqrec1->comment.l) fprintf (unknownfile1, " %s\n", fqrec1->comment.s);
-			else fprintf (unknownfile1, "\n");
-
-			fprintf (unknownfile1, "%s\n", fqrec1->qual.s);
-
-
-			fprintf (unknownfile2, "@%s", fqrec2->name.s);
-			if (fqrec2->comment.l) fprintf (unknownfile2, " %s\n", fqrec2->comment.s);
-			else fprintf (unknownfile2, "\n");
-
-			fprintf (unknownfile2, "%s\n", fqrec2->seq.s);
-
-			fprintf (unknownfile2, "+%s", fqrec2->name.s);
-			if (fqrec2->comment.l) fprintf (unknownfile2, " %s\n", fqrec2->comment.s);
-			else fprintf (unknownfile2, "\n");
-
-			fprintf (unknownfile2, "%s\n", fqrec2->qual.s);
-
-			num_unknown += 2;
-		}
-
-		total += 2;
-	}
-
-	if (l1 < 0) {
-		l2 = kseq_read (fqrec2);
-		if (l2 >= 0) {
-			fprintf (stderr, "Error: PE file 1 is shorter than PE file 2. Disregarding rest of PE file 2.\n");
-		}
-	}
-
-
-	if (!quiet) {
-		fprintf (stdout, "\nTotal FastQ records: %d (%d pairs)\n\n", total, total/2);
-		curr = head;
-		while (curr) {
-			fprintf (stdout, "FastQ records for barcode %s: %d (%d pairs)\n", curr->bc, curr->num_records, curr->num_records/2);
-			curr = curr->next;
-		}
-		fprintf (stdout, "\nFastQ records with no barcode match: %d (%d pairs)\n", num_unknown, num_unknown/2);
-		fprintf (stdout, "\nNumber of mismatches allowed: %d\n\n", mismatch);
-	}
-
-
-	kseq_destroy (fqrec1);
-	kseq_destroy (fqrec2);
-	gzclose (pe1);
-	gzclose (pe2);
-	fclose (unknownfile1);
-	fclose (unknownfile2);
-	fclose (barfile);
-
-	free (infn1);
-	free (infn2);
-	free (barfn);
-	free (unknownfn1);
-	free (unknownfn2);
-
-	curr = head;
-	while (curr) {
-		fclose (curr->bcfile1);
-		fclose (curr->bcfile2);
-		free (curr->bc);
-		temp = curr;
-		curr = curr->next;
-		free (temp);
-	}
-
-	return EXIT_SUCCESS;
-}
diff --git a/src/demulti_single.c b/src/demulti_single.c
deleted file mode 100644
index d304410..0000000
--- a/src/demulti_single.c
+++ /dev/null
@@ -1,239 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <zlib.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <limits.h>
-#include <zlib.h>
-#include "sabre.h"
-#include "kseq.h"
-
-KSEQ_INIT(gzFile, gzread)
-
-
-static struct option single_long_options[] = {
-  {"fastq-file", required_argument, 0, 'f'},
-  {"barcode-file", required_argument, 0, 'b'},
-  {"unknown-output", required_argument, 0, 'u'},
-  {"max-mismatch", optional_argument, 0, 'm'},
-  {"quiet", optional_argument, 0, 'z'},
-  {GETOPT_HELP_OPTION_DECL},
-  {GETOPT_VERSION_OPTION_DECL},
-  {NULL, 0, NULL, 0}
-};
-
-void single_usage (int status) {
-  
-  fprintf (stderr, "\nUsage: %s se -f <fastq sequence file> -b <barcode file> -u <unknown barcode output file>\n\
-\n\
-Options:\n\
--f, --fastq-file, Input fastq file (required)\n", PROGRAM_NAME);
-
-  fprintf (stderr, "-b, --barcode-file, File with barcode and output file name per line (required)\n\
--u, --unknown-output, Output file that contains records with no barcodes found. (required)\n\
--m <n>, --max-mismatch <n>, Optional argument that is the maximum number of mismatches allowed in a barcode. Default 0.\n\
---quiet, don't output matching info\n\
---help, display this help and exit\n\
---version, output version information and exit\n\n");
-
-  exit (status);
-}
-
-int single_main (int argc, char *argv[]) {
-
-	gzFile se=NULL;
-	kseq_t *fqrec;
-	int debug=0;
-	int optc;
-	extern char *optarg;
-	FILE* barfile = NULL;
-	FILE* unknownfile=NULL;
-	char *barfn=NULL;
-	char *infn=NULL;
-	char *unknownfn=NULL;
-	barcode_data *curr, *head, *temp;
-	char barcode [MAX_BARCODE_LENGTH];
-	char baroutfn [MAX_FILENAME_LENGTH];
-	int num_unknown=0;
-	int total=0;
-	int mismatch=0;
-	int quiet=0;
-
-	while (1) {
-		int option_index = 0;
-		optc = getopt_long (argc, argv, "df:b:u:m:z", single_long_options, &option_index);
-
-		if (optc == -1) break;
-
-		switch (optc) {
-			if (single_long_options[option_index].flag != 0) break;
-
-			case 'f':
-				infn = (char*) malloc (strlen (optarg) + 1);
-				strcpy (infn, optarg);
-				break;
-
-			case 'b':
-				barfn = (char*) malloc (strlen (optarg) + 1);
-				strcpy (barfn, optarg);
-				break;
-
-			case 'u':
-				unknownfn = (char*) malloc (strlen (optarg) + 1);
-				strcpy (unknownfn, optarg);
-				break;
-
-			case 'm':
-				mismatch = atoi (optarg);
-				break;
-
-			case 'z':
-				quiet=1;
-				break;
-
-			case 'd':
-				debug = 1;
-				break;
-
-			case_GETOPT_HELP_CHAR(single_usage)
-			case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
-
-			case '?':
-				single_usage (EXIT_FAILURE);
-				break;
-
-			default:
-				single_usage (EXIT_FAILURE);
-				break;
-		}
-	}
-
-
-	if (!infn || !barfn) {
-		single_usage (EXIT_FAILURE);
-	}
-
-	if (!strcmp (infn, barfn)) {
-		fprintf (stderr, "Error: Input file is same as barcode file.\n");
-		return EXIT_FAILURE;
-	}
-
-	se = gzopen (infn, "r");
-	if (!se) {
-		fprintf (stderr, "Could not open input file '%s'.\n", infn);
-		return EXIT_FAILURE;
-	}
-
-	barfile = fopen (barfn, "r");
-	if (!barfile) {
-		fprintf (stderr, "Could not open barcode file '%s'.\n", barfn);
-		return EXIT_FAILURE;
-	}
-
-	unknownfile = fopen (unknownfn, "w");
-	if (!unknownfile) {
-		fprintf (stderr, "Could not open unknown output file '%s'.\n", unknownfn);
-		return EXIT_FAILURE;
-	}
-
-
-	/* Set up a linked list of the barcode data */
-	head = NULL;
-	while (fscanf (barfile, "%s%s", barcode, baroutfn) != EOF) {
-		curr = (barcode_data*) malloc (sizeof (barcode_data));
-		curr->bc = (char*) malloc (strlen(barcode) + 1);
-		strcpy (curr->bc, barcode);
-
-		curr->bcfile = fopen (baroutfn, "w");
-		curr->num_records = 0;
-
-		curr->next = head;
-		head = curr;
-	}
-
-	
-	fqrec = kseq_init (se);
-
-	while (kseq_read (fqrec) >= 0) {
-
-		/* Go through linked list of barcode data and compare each barcode */
-		/* with the sequence until a match is found or no match is found for any */
-		curr = head;
-		while (curr) {
-			if (strncmp_with_mismatch (curr->bc, fqrec->seq.s, strlen (curr->bc), mismatch) == 0) {
-				break;
-			}
-
-			curr = curr->next;
-		}
-
-
-		/* If barcode data is found, output to demultiplexed file, else output to unknown file */
-		if (curr != NULL) {
-			fprintf (curr->bcfile, "@%s", fqrec->name.s);
-			if (fqrec->comment.l) fprintf (curr->bcfile, " %s\n", fqrec->comment.s);
-			else fprintf (curr->bcfile, "\n");
-
-			fprintf (curr->bcfile, "%s\n", (fqrec->seq.s)+strlen(curr->bc));
-
-			fprintf (curr->bcfile, "+%s", fqrec->name.s);
-			if (fqrec->comment.l) fprintf (curr->bcfile, " %s\n", fqrec->comment.s);
-			else fprintf (curr->bcfile, "\n");
-
-			fprintf (curr->bcfile, "%s\n", (fqrec->qual.s)+strlen(curr->bc));
-
-			curr->num_records++;
-		}
-
-		else {
-			fprintf (unknownfile, "@%s", fqrec->name.s);
-			if (fqrec->comment.l) fprintf (unknownfile, " %s\n", fqrec->comment.s);
-			else fprintf (unknownfile, "\n");
-
-			fprintf (unknownfile, "%s\n", fqrec->seq.s);
-
-			fprintf (unknownfile, "+%s", fqrec->name.s);
-			if (fqrec->comment.l) fprintf (unknownfile, " %s\n", fqrec->comment.s);
-			else fprintf (unknownfile, "\n");
-
-			fprintf (unknownfile, "%s\n", fqrec->qual.s);
-
-			num_unknown++;
-		}
-
-		total++;
-	}
-
-
-	if (!quiet) {
-		fprintf (stdout, "\nTotal FastQ records: %d\n\n", total);
-		curr = head;
-		while (curr) {
-			fprintf (stdout, "FastQ records for barcode %s: %d\n", curr->bc, curr->num_records);
-			curr = curr->next;
-		}
-		fprintf (stdout, "\nFastQ records with no barcode match: %d\n", num_unknown);
-		fprintf (stdout, "\nNumber of mismatches allowed: %d\n\n", mismatch);
-	}
-
-	kseq_destroy (fqrec);
-	gzclose (se);
-	fclose (barfile);
-	fclose (unknownfile);
-
-	free (infn);
-	free (unknownfn);
-	free (barfn);
-
-	curr = head;
-	while (curr) {
-		fclose (curr->bcfile);
-		free (curr->bc);
-		temp = curr;
-		curr = curr->next;
-		free (temp);
-	}
-
-	return 0;
-}
diff --git a/src/demultiplex.c b/src/demultiplex.c
new file mode 100644
index 0000000..da149e3
--- /dev/null
+++ b/src/demultiplex.c
@@ -0,0 +1,207 @@
+
+/*
+ * set softtab=4
+ * set shiftwidth=4
+ * set expandtab
+ *
+ */
+
+/*
+ * sabre FASTQ files demultiplexing
+ * demultiplex.c: FASTQ demultiplexing
+ *
+ */
+
+#include "demultiplex.h"
+
+// Is m1 a better match than m2?  0 -> yes, use m1.  1 -> no, stick with m2
+int better(match_ret_t m1, match_ret_t m2) {
+    if (m1.cropped<0)
+        return 0;
+    if (m2.cropped<0)
+        return 1;
+    return (m1.cropped + m1.mismatches) < (m2.cropped+m2.mismatches);
+}
+
+// Is m the best possible match?
+int best(match_ret_t m) {
+    return m.cropped==0 && m.mismatches==0;
+}
+
+void write_out(const param_t *params, pthread_mutex_t *out_lock, metrics_t* metrics,
+               match_ret_t best_match, barcode_data_t *best_bc,
+               const char* actl_bc,
+               fq_rec_t *fq_rec1,fq_rec_t *fq_rec2);
+
+void* demult_runner(void *arg) {
+    fq_rec_t *fq_rec1 = (fq_rec_t*) malloc(sizeof(fq_rec_t));
+    fq_rec_t *fq_rec2 = (fq_rec_t*) malloc(sizeof(fq_rec_t));
+
+    init_fq_rec(fq_rec1);
+    init_fq_rec(fq_rec2);
+
+    thread_data_t* thread_data = (thread_data_t*)arg;
+    //int my_line_num;
+
+    /* Get reads, one at a time */
+
+    while(1) {
+
+        // lock reading
+        pthread_mutex_lock(thread_data->in_lock);
+
+        //this is equivalent to if(false), which means this block
+        //is always skipped, unless when there is an error/end of the file
+        if(get_fq_rec(fq_rec1, thread_data->params->fq1_fd)) {
+            // sanity check no more reads
+            pthread_mutex_unlock(thread_data->in_lock);
+            break;
+        }
+
+        if(thread_data->params->paired > 0) {
+            if(get_fq_rec(fq_rec2, thread_data->params->fq2_fd)) {
+                //error out there becuase if reached the end of the file
+                //then we should hit first break, above, since the assumptions
+                //that the files of equal length. If issues with R2 only this is an error
+                fprintf (stderr, "\n\
+                        \n ERROR: R2 file shorter than R1 file.\
+                        \n Stopping here:\
+                        \n %s\
+                        \n",
+                        fq_rec1->name);
+                pthread_mutex_unlock(thread_data->in_lock);
+                exit(1);
+            }
+        }
+
+        // unlock reading
+        // TODO this bit of code for ordered fastq files, implement later?
+        //my_line_num = *(thread_data->line_num);
+        //*thread_data->line_num += 1;
+        pthread_mutex_unlock(thread_data->in_lock);
+
+        // Store a copy of the barcode found in the read (including the mismatches)
+        char *actl_bc = NULL;
+
+        /* Step 1: Find matching barcode */
+        match_ret_t best_match = {-1,-1};
+        barcode_data_t *best_bc = NULL;
+        for (barcode_data_t *curr = thread_data->curr; curr!=NULL; curr = curr->next) {
+            for (int i=0; curr->bc[i]; i++) {
+                match_ret_t mtch = chk_bc_mtch(curr->bc[i], fq_rec1->seq, thread_data->params->mismatch, thread_data->params->max_5prime_crop);
+                if(better(mtch, best_match)) {
+                    //found better match
+                    best_match = mtch;
+                    best_bc = curr;
+                    if (actl_bc) free(actl_bc);
+                    actl_bc = strndup( (fq_rec1->seq)+mtch.cropped, strlen(curr->bc[i]) );
+                    if (best(best_match))
+                        break;
+                }
+            }
+
+            if (best(best_match))
+                break;
+        }
+
+        /* Step 2: Write read out into barcode specific file */
+        //TODO this bit of code to keep fastq files ordered as per original fastq files
+        //which I don't think that needed? at least not at this stage
+        // lock writing
+        //while(*(thread_data->out_line_num) != my_line_num) {
+        //    pthread_cond_wait(thread_data->cv, thread_data->out_lock);
+        //}
+        //*thread_data->out_line_num += 1;
+
+        //pthread_cond_broadcast(thread_data->cv);  // Tell everyone it might be their turn!
+
+        write_out(thread_data->params, thread_data->out_lock, thread_data->metrics, best_match, best_bc, actl_bc, fq_rec1, fq_rec2);
+        if (actl_bc)
+            free(actl_bc);
+
+        thread_data->metrics->total += 2;
+    }
+
+    free(fq_rec1);
+    free(fq_rec2);
+    //free(thread_data); according to valgrind report this line isn't needed since no errors given out..
+    return NULL;
+}
+
+
+void write_out(const param_t *params, pthread_mutex_t *out_lock, metrics_t* metrics,
+               match_ret_t best_match, barcode_data_t *best_bc,
+               const char* actl_bc,
+               fq_rec_t *fq_rec1,fq_rec_t *fq_rec2) {
+
+    char fqread1[MAX_READ_SIZE];
+    char fqread2[MAX_READ_SIZE];
+
+    fqread1[0] = '\0';
+    fqread2[0] = '\0';
+
+    char *umi_idx = NULL;
+
+    if(best_bc != NULL) {
+        //for now assume barcode and umi are in R1 read
+        if(params->umi > 0) {
+
+            const char *actl_umi_idx = (fq_rec1->seq)+strlen(actl_bc)+best_match.cropped;
+
+            if(strlen(actl_umi_idx) < params->min_umi_len) {
+                pthread_mutex_lock(out_lock);
+                fprintf(params->umis_2_short_fd, "%s\t%s\t%zu\t%d\n", fq_rec1->name, actl_umi_idx, strlen(actl_umi_idx), params->min_umi_len);
+                pthread_mutex_unlock(out_lock);
+                return;
+            }
+            else {
+                umi_idx = strdup(actl_umi_idx);
+                umi_idx[params->min_umi_len] = '\0';
+            }
+        }
+
+        if(params->combine > 0 && actl_bc != NULL) {
+            get_merged_fqread(fqread1, fq_rec1, fq_rec2, actl_bc, umi_idx, params->no_comment, best_match.cropped);
+
+            pthread_mutex_lock(out_lock);
+            fputs(fqread1, best_bc->bcfile1);
+            pthread_mutex_unlock(out_lock);
+
+        }
+        else {
+            get_fqread(fqread1, fq_rec1, actl_bc, umi_idx, params->no_comment, best_match.cropped);
+
+            pthread_mutex_lock(out_lock);
+            fputs(fqread1, best_bc->bcfile1);
+
+            if(params->paired > 0) {
+                get_fqread(fqread2, fq_rec1, actl_bc, umi_idx, params->no_comment, best_match.cropped);
+
+                fputs(fqread2, best_bc->bcfile2);
+
+                //dont need to increment buff_cnt, assuming fq_read1 keeps the right count
+                best_bc->num_records += 1;
+            }
+            pthread_mutex_unlock(out_lock);
+        }
+        best_bc->num_records += 1;
+    }
+    else {
+
+        get_fqread(fqread1, fq_rec1, NULL, NULL, params->no_comment, 0);
+
+        pthread_mutex_lock(out_lock);
+        fputs(fqread1, params->unassigned1_fd);
+
+        metrics->num_unknown += 1;
+
+        if(params->paired > 0) {
+            get_fqread(fqread2, fq_rec2, NULL, NULL, params->no_comment, 0);
+
+            fputs(fqread2, params->unassigned2_fd);
+
+            metrics->num_unknown += 1;
+        }
+        pthread_mutex_unlock(out_lock);
+    }
+}
\ No newline at end of file
diff --git a/src/demultiplex.h b/src/demultiplex.h
new file mode 100644
index 0000000..22e3417
--- /dev/null
+++ b/src/demultiplex.h
@@ -0,0 +1,15 @@
+#ifndef DEMULTIPLEX_H
+#define DEMULTIPLEX_H
+
+#include "sabre.h"
+#include "utils.h"
+#include "fastq.h"
+
+#define MAX_READ_SIZE 2048
+//#define MAX_READ_NUMBER 100000
+#define MAX_READ_NUMBER 15000
+#define MAX_READ_BUFFER MAX_READ_SIZE*MAX_READ_NUMBER
+
+void* demult_runner(void *arg);
+
+#endif /*DEMULTIPLEX_H*/
diff --git a/src/fastq.c b/src/fastq.c
new file mode 100644
index 0000000..333ebf5
--- /dev/null
+++ b/src/fastq.c
@@ -0,0 +1,47 @@
+
+#include "fastq.h"
+
+int get_line(gzFile fq_fd, char *line, int buff) {
+
+    char *new_line = gzgets(fq_fd, line, buff);
+
+    if(new_line == NULL) {
+        return -1;
+    }
+
+    int str_len = strlen(new_line);
+
+    if(new_line[str_len-1] != '\n') {
+        fprintf(stderr, "Line too long %d\n", buff);
+    exit(1);
+    }
+
+    new_line[str_len-1] = '\0';
+
+    return 0;
+}
+//user strchr
+int get_fq_rec(fq_rec_t *fq_rec, gzFile fq_fd) {
+
+    int done = get_line(fq_fd, fq_rec->name, LINE_SIZE);
+    done = done || get_line(fq_fd, fq_rec->seq, LINE_SIZE);
+    done = done || get_line(fq_fd, fq_rec->other, LINE_SIZE);
+    done = done || get_line(fq_fd, fq_rec->qual, LINE_SIZE);
+    char *ptr = strchr(fq_rec->name,' ');
+    if(ptr) {
+        *ptr='\0';
+        fq_rec->comment = ptr+1;
+    } else {
+        fq_rec->comment = NULL;
+    }
+    // before writing it out check that comment isn't null
+
+    return done;
+}
+
+void init_fq_rec(fq_rec_t *fq_rec) {
+    fq_rec->name[0] = '\0';
+    fq_rec->seq[0] = '\0';
+    fq_rec->other[0] = '\0';
+    fq_rec->qual[0] = '\0';
+}
diff --git a/src/fastq.h b/src/fastq.h
new file mode 100644
index 0000000..0083621
--- /dev/null
+++ b/src/fastq.h
@@ -0,0 +1,20 @@
+#ifndef FASTQ_H
+#define FASTQ_H
+
+#include "sabre.h"
+
+#define LINE_SIZE 600
+
+typedef struct {
+    char name[LINE_SIZE];
+    char *comment;
+    char seq[LINE_SIZE];
+    char other[LINE_SIZE];
+    char qual[LINE_SIZE];
+} fq_rec_t;
+
+void init_fq_rec(fq_rec_t *fq_rec);
+int get_line(gzFile fq_fd, char *line, int buff);
+int get_fq_rec(fq_rec_t *fq_rec, gzFile fq_fd);
+
+#endif /*FASTQ_H*/
diff --git a/src/kseq.h b/src/kseq.h
index 73600c4..8f9e498 100644
--- a/src/kseq.h
+++ b/src/kseq.h
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2008 Genome Research Ltd (GRL).
+   Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
@@ -23,9 +23,7 @@
    SOFTWARE.
 */
 
-/* Contact: Heng Li <lh3@sanger.ac.uk> */
-
-/* Last Modified: 12APR2009 */
+/* Last Modified: 2017-02-11 */
 
 #ifndef AC_KSEQ_H
 #define AC_KSEQ_H
@@ -34,47 +32,50 @@
 #include <string.h>
 #include <stdlib.h>
 
-#define KS_SEP_SPACE 0 /* isspace(): \t, \n, \v, \f, \r */
-#define KS_SEP_TAB   1 /* isspace() && !' ' */
-#define KS_SEP_MAX   1
+#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
+#define KS_SEP_TAB   1 // isspace() && !' '
+#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
+#define KS_SEP_MAX   2
 
-#define __KS_TYPE(type_t)                       \
-  typedef struct __kstream_t {                  \
-    char *buf;                                  \
-    int begin, end, is_eof;                     \
-    type_t f;                                   \
-  } kstream_t;
+#define __KS_TYPE(type_t) \
+	typedef struct __kstream_t { \
+		unsigned char *buf; \
+		int begin, end, is_eof; \
+		type_t f; \
+	} kstream_t;
 
+#define ks_err(ks) ((ks)->end < 0)
 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
 
-#define __KS_BASIC(type_t, __bufsize)                                   \
-  static inline kstream_t *ks_init(type_t f)                            \
-	{                                                               \
-          kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
-          ks->f = f;                                                    \
-          ks->buf = (char*)malloc(__bufsize);                           \
-          return ks;                                                    \
-	}                                                               \
-        static inline void ks_destroy(kstream_t *ks)                    \
-	{                                                               \
-          if (ks) {                                                     \
-          free(ks->buf);                                                \
-          free(ks);                                                     \
-        }                                                               \
-          }
-
-#define __KS_GETC(__read, __bufsize)                                    \
-  static inline int ks_getc(kstream_t *ks)				\
-	{                                                               \
-          if (ks->is_eof && ks->begin >= ks->end) return -1;            \
-          if (ks->begin >= ks->end) {                                   \
-          ks->begin = 0;                                                \
-          ks->end = __read(ks->f, ks->buf, __bufsize);                  \
-          if (ks->end < __bufsize) ks->is_eof = 1;                      \
-          if (ks->end == 0) return -1;					\
-        }                                                               \
-          return (int)ks->buf[ks->begin++];                             \
+#define __KS_BASIC(type_t, __bufsize) \
+	static inline kstream_t *ks_init(type_t f) \
+	{ \
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
+		ks->f = f; \
+		ks->buf = (unsigned char*)malloc(__bufsize); \
+		return ks; \
+	} \
+	static inline void ks_destroy(kstream_t *ks) \
+	{ \
+		if (ks) { \
+			free(ks->buf); \
+			free(ks); \
+		} \
+	}
+
+#define __KS_GETC(__read, __bufsize) \
+	static inline int ks_getc(kstream_t *ks) \
+	{ \
+		if (ks_err(ks)) return -3; \
+		if (ks_eof(ks)) return -1; \
+		if (ks->begin >= ks->end) { \
+			ks->begin = 0; \
+			ks->end = __read(ks->f, ks->buf, __bufsize); \
+			if (ks->end == 0) { ks->is_eof = 1; return -1; } \
+			else if (ks->end < 0) { ks->is_eof = 1; return -3; } \
+		} \
+		return (int)ks->buf[ks->begin++]; \
 	}
 
 #ifndef KSTRING_T
@@ -89,135 +90,154 @@ typedef struct __kstring_t {
 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 #endif
 
-#define __KS_GETUNTIL(__read, __bufsize)                                \
-  static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
-  {                                                                     \
-   if (dret) *dret = 0;                                                 \
-   str->l = 0;                                                          \
-   if (ks->begin >= ks->end && ks->is_eof) return -1;                   \
-   for (;;) {                                                           \
-             int i;                                                     \
-             if (ks->begin >= ks->end) {                                \
-               if (!ks->is_eof) {                                       \
-               ks->begin = 0;                                           \
-               ks->end = __read(ks->f, ks->buf, __bufsize);             \
-               if (ks->end < __bufsize) ks->is_eof = 1;                 \
-               if (ks->end == 0) break;                                 \
-             } else break;                                              \
-             }                                                          \
-               if (delimiter > KS_SEP_MAX) {                            \
-               for (i = ks->begin; i < ks->end; ++i)                    \
-                 if (ks->buf[i] == delimiter) break;                    \
-             } else if (delimiter == KS_SEP_SPACE) {                    \
-               for (i = ks->begin; i < ks->end; ++i)                    \
-                 if (isspace(ks->buf[i])) break;                        \
-             } else if (delimiter == KS_SEP_TAB) {                      \
-               for (i = ks->begin; i < ks->end; ++i)                    \
-                 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break;   \
-             } else i = 0; /* never come to here! */                    \
-               if (str->m - str->l < i - ks->begin + 1) {               \
-               str->m = str->l + (i - ks->begin) + 1;                   \
-               kroundup32(str->m);                                      \
-               str->s = (char*)realloc(str->s, str->m);                 \
-             }                                                          \
-               memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
-               str->l = str->l + (i - ks->begin);                       \
-               ks->begin = i + 1;                                       \
-               if (i < ks->end) {                                       \
-               if (dret) *dret = ks->buf[i];                            \
-               break;                                                   \
-             }                                                          \
-             }                                                          \
-               if (str->l == 0) {                                       \
-               str->m = 1;                                              \
-               str->s = (char*)calloc(1, 1);                            \
-             }                                                          \
-               str->s[str->l] = '\0';                                   \
-               return str->l;                                           \
-               }
-
-#define KSTREAM_INIT(type_t, __read, __bufsize)                         \
-  __KS_TYPE(type_t)							\
-  __KS_BASIC(type_t, __bufsize)                                         \
-  __KS_GETC(__read, __bufsize)                                          \
-  __KS_GETUNTIL(__read, __bufsize)
-
-#define __KSEQ_BASIC(type_t)                                            \
-  static inline kseq_t *kseq_init(type_t fd)                            \
-  {                                                                     \
-   kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                      \
-   s->f = ks_init(fd);                                                  \
-   return s;                                                            \
-   }                                                                    \
-  static inline void kseq_rewind(kseq_t *ks)                            \
-  {                                                                     \
-   ks->last_char = 0;                                                   \
-   ks->f->is_eof = ks->f->begin = ks->f->end = 0;                       \
-   }                                                                    \
-  static inline void kseq_destroy(kseq_t *ks)                           \
-  {                                                                     \
-   if (!ks) return;                                                     \
-   free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
-   ks_destroy(ks->f);                                                   \
-   free(ks);                                                            \
-   }
+#define __KS_GETUNTIL(__read, __bufsize) \
+	static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
+	{ \
+		int gotany = 0; \
+		if (dret) *dret = 0; \
+		str->l = append? str->l : 0; \
+		for (;;) { \
+			int i; \
+			if (ks_err(ks)) return -3; \
+			if (ks->begin >= ks->end) { \
+				if (!ks->is_eof) { \
+					ks->begin = 0; \
+					ks->end = __read(ks->f, ks->buf, __bufsize); \
+					if (ks->end == 0) { ks->is_eof = 1; break; } \
+					if (ks->end == -1) { ks->is_eof = 1; return -3; } \
+				} else break; \
+			} \
+			if (delimiter == KS_SEP_LINE) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (ks->buf[i] == '\n') break; \
+			} else if (delimiter > KS_SEP_MAX) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (ks->buf[i] == delimiter) break; \
+			} else if (delimiter == KS_SEP_SPACE) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i])) break; \
+			} else if (delimiter == KS_SEP_TAB) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
+			} else i = 0; /* never come to here! */ \
+			if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
+				str->m = str->l + (i - ks->begin) + 1; \
+				kroundup32(str->m); \
+				str->s = (char*)realloc(str->s, str->m); \
+			} \
+			gotany = 1; \
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
+			str->l = str->l + (i - ks->begin); \
+			ks->begin = i + 1; \
+			if (i < ks->end) { \
+				if (dret) *dret = ks->buf[i]; \
+				break; \
+			} \
+		} \
+		if (!gotany && ks_eof(ks)) return -1; \
+		if (str->s == 0) { \
+			str->m = 1; \
+			str->s = (char*)calloc(1, 1); \
+		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
+		str->s[str->l] = '\0'; \
+		return str->l; \
+	} \
+	static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) \
+	__KS_TYPE(type_t) \
+	__KS_BASIC(type_t, __bufsize) \
+	__KS_GETC(__read, __bufsize) \
+	__KS_GETUNTIL(__read, __bufsize)
+
+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
+
+#define __KSEQ_BASIC(SCOPE, type_t) \
+	SCOPE kseq_t *kseq_init(type_t fd) \
+	{ \
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
+		s->f = ks_init(fd); \
+		return s; \
+	} \
+	SCOPE void kseq_destroy(kseq_t *ks) \
+	{ \
+		if (!ks) return; \
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f); \
+		free(ks); \
+	}
 
 /* Return value:
    >=0  length of the sequence (normal)
    -1   end-of-file
    -2   truncated quality string
-*/
-#define __KSEQ_READ                                                     \
-  static int kseq_read(kseq_t *seq)                                     \
-  {                                                                     \
-   int c;                                                               \
-   kstream_t *ks = seq->f;                                              \
-   if (seq->last_char == 0) { /* then jump to the next header line */   \
-                             while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
-                             if (c == -1) return -1; /* end of file */  \
-                             seq->last_char = c;                        \
-                             } /* the first header char has been read */ \
-   seq->comment.l = seq->seq.l = seq->qual.l = 0;                       \
-   if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;               \
-   if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);              \
-   while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
-     if (isgraph(c)) { /* printable non-space character */              \
-     if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */        \
-     seq->seq.m = seq->seq.l + 2;                                       \
-     kroundup32(seq->seq.m); /* rounded to next closest 2^k */          \
-     seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m);               \
-   }                                                                    \
-     seq->seq.s[seq->seq.l++] = (char)c;                                \
-   }                                                                    \
-   }                                                                    \
-     if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
-     seq->seq.s[seq->seq.l] = 0;	/* null terminated string */    \
-     if (c != '+') return seq->seq.l; /* FASTA */                       \
-     if (seq->qual.m < seq->seq.m) {	/* allocate enough memory */	\
-     seq->qual.m = seq->seq.m;                                          \
-     seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);		\
-   }                                                                    \
-     while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
-     if (c == -1) return -2; /* we should not stop here */              \
-     while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)        \
-       if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
-     seq->qual.s[seq->qual.l] = 0; /* null terminated string */		\
-     seq->last_char = 0;	/* we have not come to the next header line */ \
-     if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
-     return seq->seq.l;                                                 \
-     }
-
-#define __KSEQ_TYPE(type_t)						\
-  typedef struct {							\
-    kstring_t name, comment, seq, qual;                                 \
-    int last_char;							\
-    kstream_t *f;							\
-  } kseq_t;
-
-#define KSEQ_INIT(type_t, __read)                                       \
-  KSTREAM_INIT(type_t, __read, 4096)                                    \
-  __KSEQ_TYPE(type_t)							\
-  __KSEQ_BASIC(type_t)                                                  \
-  __KSEQ_READ
+   -3   error reading stream
+ */
+#define __KSEQ_READ(SCOPE) \
+	SCOPE int kseq_read(kseq_t *seq) \
+	{ \
+		int c,r; \
+		kstream_t *ks = seq->f; \
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
+			if (c < 0) return c; /* end of file or error*/ \
+			seq->last_char = c; \
+		} /* else: the first header char has been read in the previous call */ \
+		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
+		if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r;  /* normal exit: EOF or error */ \
+		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
+		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
+			seq->seq.m = 256; \
+			seq->seq.s = (char*)malloc(seq->seq.m); \
+		} \
+		while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
+			if (c == '\n') continue; /* skip empty lines */ \
+			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
+			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
+		} \
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
+		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
+			seq->seq.m = seq->seq.l + 2; \
+			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
+			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+		} \
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
+		seq->is_fastq = (c == '+'); \
+		if (!seq->is_fastq) return seq->seq.l; /* FASTA */ \
+		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
+			seq->qual.m = seq->seq.m; \
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
+		} \
+		while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* error: no quality string */ \
+		while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
+		if (c == -3) return -3; /* stream error */ \
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
+		return seq->seq.l; \
+	}
+
+#define __KSEQ_TYPE(type_t) \
+	typedef struct { \
+		kstring_t name, comment, seq, qual; \
+		int last_char, is_fastq; \
+		kstream_t *f; \
+	} kseq_t;
+
+#define KSEQ_INIT2(SCOPE, type_t, __read) \
+	KSTREAM_INIT(type_t, __read, 16384) \
+	__KSEQ_TYPE(type_t) \
+	__KSEQ_BASIC(SCOPE, type_t) \
+	__KSEQ_READ(SCOPE)
+
+#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
+
+#define KSEQ_DECLARE(type_t) \
+	__KS_TYPE(type_t) \
+	__KSEQ_TYPE(type_t) \
+	extern kseq_t *kseq_init(type_t fd); \
+	void kseq_destroy(kseq_t *ks); \
+	int kseq_read(kseq_t *seq);
 
 #endif
diff --git a/src/metrics.c b/src/metrics.c
new file mode 100644
index 0000000..646b3ea
--- /dev/null
+++ b/src/metrics.c
@@ -0,0 +1,187 @@
+#include "utils.h"
+
+/*
+   gcc -Wall -O2 -std=c99 -o metrics metrics.c -lz
+ */
+
+#define BARCODE_ARRAY 1000000
+
+//int chk_bc_arr(const char *arr, char *bc);
+
+/*
+   WISDOM char **arr and char *arr[] mean the same things
+   but later one more informative
+ */
+
+int chk_bc_arr(barcodes_t *arr, char *bc) {
+
+    int i = 0;
+    while(arr[i].bc != 0) {
+
+        if(strcmp(bc, arr[i].bc) == 0) {
+            return i;
+        }
+
+        i += 1;
+    }
+
+    arr[i].bc = strdup(bc);
+
+    // error handling
+    if(i == BARCODE_ARRAY-1) {
+        fprintf(stderr, "ERROR: gone too far in the array\n");
+        exit(1);
+    }
+
+    // could also do initialisation of the next element here
+    // arr[i+1]=0;
+    return i;
+}
+
+int chk_umi_arr(umis_t *arr, char *bc) {
+
+    int i = 0;
+    while(arr[i].bc != 0) {
+
+        if(strcmp(bc, arr[i].bc) == 0) {
+            return i;
+        }
+
+        i += 1;
+    }
+
+    arr[i].bc = strdup(bc);
+    arr[i].len = strlen(bc);
+
+    if(i == BARCODE_ARRAY-1) {
+        fprintf(stderr, "ERROR: gone too far in the array\n");
+        exit(1);
+    }
+    return i;
+}
+
+int umi_n_cmp(const void *a1, const void *b1) {
+
+    const umis_t* a=a1;
+    const umis_t* b=b1;
+
+    return a->cnts - b->cnts;
+}
+
+int bc_n_cmp(const void *a1, const void *b1) {
+
+    const barcodes_t* a=a1;
+    const barcodes_t* b=b1;
+
+    return a->cnts - b->cnts;
+}
+
+int main (int argc, char *argv[]) {
+
+    if (argc <= 2) {
+        fprintf(stderr, "\n\
+                         \n  Usage:   metrics <fastq_file> <mode>\
+                         \n\
+                         \n  Options:\
+                         \n\
+                         \n           mode INT     [0|1]; 0 = sample barcode, 1 = umis barcodes\
+                         \n\
+                         \n");
+        exit (EXIT_SUCCESS);
+    }
+
+    gzFile pe1=NULL;
+    pe1 = gzopen(argv[1], "r");
+    kseq_t *fqrec1;
+    int l1;
+    int mode = atoi(argv[2]);
+
+    fqrec1 = kseq_init(pe1);
+
+    /*
+       WISDOM these tow are actually different things
+       char barcodes[BARCODE_ARRAY][BARCODE];
+       char *barcodes[];
+     */
+
+    //char **barcodes = calloc(BARCODE_ARRAY, sizeof(char*));
+    //int *bc_cnts = calloc(BARCODE_ARRAY, sizeof(int));
+
+    barcodes_t *barcodes;
+    barcodes = calloc(BARCODE_ARRAY, sizeof(barcodes_t));
+
+    umis_t *umis;
+    umis = calloc(BARCODE_ARRAY, sizeof(umis_t));
+
+    /* Get reads, one at a time */
+    while ((l1 = kseq_read(fqrec1)) >= 0) {
+
+        char *last;
+        char *last2;
+
+        char *p = strtok(fqrec1->name.s, ":");
+
+        while (p != NULL) {
+            last2 = last;
+            last = p;
+            p = strtok(NULL, ":");
+        }
+
+        if(mode == 0) {
+            int bc_idx = chk_bc_arr(barcodes, last2);
+            barcodes[bc_idx].cnts += 1;
+        }
+        else if(mode == 1) {
+
+            int umi_idx = chk_umi_arr(umis, last);
+            umis[umi_idx].cnts += 1;
+        }
+    }
+
+    if(mode == 0) {
+
+        int n = 0;
+        while(barcodes[n].bc != 0) {
+            n++;
+        }
+        qsort(barcodes, n, sizeof(barcodes_t), bc_n_cmp);
+
+        /*
+           WISDOM this is to limit the right scope for i
+           */
+
+        int i = 0;
+        while(barcodes[i].bc != 0) {
+            fprintf(stdout, "%s\t%d\n", barcodes[i].bc, barcodes[i].cnts);
+            i++;
+        }
+    }
+    else if(mode == 1) {
+
+        int n = 0;
+        while(umis[n].bc != 0) {
+            n++;
+        }
+        qsort(umis, n, sizeof(umis_t), umi_n_cmp);
+
+        int i = 0;
+        while(umis[i].bc != 0) {
+            fprintf(stdout, "%s\t%d\t%d\n", umis[i].bc, umis[i].len, umis[i].cnts);
+            i++;
+        }
+    }
+
+    gzclose(pe1);
+    kseq_destroy(fqrec1);
+
+    return EXIT_SUCCESS;
+}
+//TODO use struct instead
+//used qsort
+//write a function to sort  e.g bubbleSort
+//actually sorting function should be simpler then bubbleSort
+//it should return -1, 0 or 1. OR -1 or 1.
+//
+//barcodes_t barcodes[size];
+//
+//barcodes[i].cnts +=1;
diff --git a/src/sabre.c b/src/sabre.c
index 4e066bb..a41ce4f 100644
--- a/src/sabre.c
+++ b/src/sabre.c
@@ -1,54 +1,407 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <zlib.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <string.h>
+
 #include "sabre.h"
+#include "utils.h"
+#include "usage.h"
+#include "demultiplex.h"
+
+FILE* my_fopen(const char* fname, int gz) {
+    static char* compressor = NULL;
 
-void main_usage (int status) {
+    if (gz) {
 
-	fprintf (stdout, "\nUsage: %s <command> [options]\n\
-\n\
-Command:\n\
-pe\tpaired-end barcode de-multiplexing\n\
-se\tsingle-end barcode de-multiplexing\n\
-\n\
---help, display this help and exit\n\
---version, output version information and exit\n\n", PROGRAM_NAME);
+        if (!compressor) {
+            // Guess whether to use pigz or gzip
+            char tmp[100];
+            FILE* fin = popen("pigz --version 2>&1", "r");
+            char* str = fgets(tmp, 50, fin);
+            int found = strncmp("pigz",tmp,4)==0;
+            compressor = str && found ? "pigz -p 2" : "gzip";
+        }
 
-	exit (status);
+        char command[2048];
+        sprintf(command, "%s > %s", compressor, fname);
+        FILE* ret = popen(command, "w");
+        return ret;
+    } else {
+        return fopen(fname, "w");
+    }
 }
 
-int main (int argc, char *argv[]) {
-	int retval=0;
+int main(int argc, char *argv[]) {
+
+    if(argc < 2) {
+        usage(EXIT_SUCCESS);
+    }
+
+    //more about getopts http://www.informit.com/articles/article.aspx?p=175771&seqNum=3
+    static struct option paired_long_options[] = {
+        {"fq1", required_argument, NULL, 'f'},
+        {"fq2", required_argument, NULL, 'r'},
+        {"barcodes", required_argument, NULL, 'b'},
+        {"unassinged1", required_argument, NULL, 'z'},
+        {"unassinged2", required_argument, NULL, 'w'},
+        {"combine", optional_argument, NULL, 'c'},
+        {"umi", optional_argument, NULL, 'u'},
+        {"max-mismatch", required_argument, 0, 'm'},
+        {"min-umi-len", required_argument, 0, 'l'},
+        {"max-5prime-crop", required_argument, 0, 'a'},
+        {"stats", required_argument, NULL, 's'},
+        {"no-comment", no_argument, 0, 'n'},
+        {"threads", optional_argument, 0, 't'},
+        {"gz-out", optional_argument, NULL, 'g'},
+        {"version", optional_argument, NULL, 'v'},
+        {"help", optional_argument, NULL, 'h'},
+        {"story", optional_argument, NULL, 'o'},
+        //{"quiet", no_argument, 0, 'z'},
+        {NULL, 0, NULL, 0}
+    };
+
+    // this is on the stack so no need to malloc
+    // stack gets cleaned when function exits,
+    // because of that no need to free either
+    param_t params;
+    set_default_params(&params);
+
+    metrics_t metrics;
+    metrics. num_unknown = 0;
+    metrics.total = 0;
+
+    //clock_t begin = clock();
+    time_t start, end;
+    start = time(NULL);
+
+    char *fq1_fn=NULL;
+    char *fq2_fn=NULL;
+
+    FILE* bc_fd;
+    char *bc_fn=NULL;
+
+    char *unassigned1_fn=NULL;
+    char *unassigned2_fn=NULL;
+    char *umis_2_short_fn=strdup("umis_too_short.txt");
+
+    FILE* log_file;
+    char *log_fn=strdup("stats.txt");
+
+    int optc;
+    extern char *optarg;
+
+    barcode_data_t *curr, *head, *temp;
+
+    int threads=4;
+
+    while (1) {
+        int option_index = 0;
+        //colon after a flag means should have arguments and no colon means just a flag i.e bool, no args after it
+        optc = getopt_long (argc, argv, "dnucvogf:r:b:z:w:m:s:l:z:a:t:", paired_long_options, &option_index);
+
+        if (optc == -1) break;
+
+        switch (optc) {
+
+            case 'f':
+            fq1_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (fq1_fn, optarg);
+            break;
+
+            case 'r':
+            fq2_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (fq2_fn, optarg);
+            break;
+
+            case 'b':
+            bc_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (bc_fn, optarg);
+            break;
+
+            case 'z':
+            unassigned1_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (unassigned1_fn, optarg);
+            break;
+
+            case 'w':
+            unassigned2_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (unassigned2_fn, optarg);
+            break;
+
+            case 'c':
+            params.combine=1;
+            break;
+
+            case 'u':
+            params.umi=1;
+            break;
+
+            case 'm':
+            params.mismatch = atoi (optarg);
+            break;
+
+            case 'l':
+            params.min_umi_len = atoi (optarg);
+            break;
+
+            case 'a':
+            params.max_5prime_crop = atoi (optarg);
+            break;
+
+            case 'n':
+            params.no_comment = 1;
+            break;
+
+            case 'g':
+            params.gz_out = 1;
+            break;
+
+            case 't':
+            threads = atoi (optarg);
+            break;
+
+            case 's':
+            log_fn = (char*) malloc (strlen (optarg) + 1);
+            strcpy (log_fn, optarg);
+            break;
+
+            case 'v':
+            version(EXIT_SUCCESS);
+            break;
+
+            //NOTE if user requrested the help menu i.e --help then
+            //return success for all other cases below while help menu
+            //is printed it wasn't intended by user (or at least we don't know that)
+            //and therefore exit code - fail
+            case 'h':
+            usage(EXIT_SUCCESS);
+            break;
+
+            case 'o':
+            little_story(EXIT_SUCCESS);
+            break;
+
+            case '*':
+            usage(EXIT_FAILURE);
+            break;
+
+            default:
+            usage(EXIT_FAILURE);
+            break;
+        }
+    }
+
+    // TODO check that what's requre if not run usage
+
+    params.fq1_fd = gzopen(fq1_fn, "r");
+    if(!params.fq1_fd) {
+        fprintf(stderr, "ERROR: Could not open input file R1 '%s'.\n", fq1_fn);
+        exit(EXIT_FAILURE);
+    }
+
+    params.fq2_fd = gzopen(fq2_fn, "r");
+    if(!params.fq2_fd) {
+        fprintf(stderr, "ERROR: Could not open input file R2 '%s'.\n", fq2_fn);
+        exit(EXIT_FAILURE);
+    }
+
+    if (!unassigned1_fn) {
+        unassigned1_fn= params.gz_out ? strdup("unassigned_R1.fq.gz") : strdup("unassigned_R1.fq");
+    }
+    if (!unassigned2_fn) {
+        unassigned2_fn= params.gz_out ? strdup("unassigned_R2.fq.gz") : strdup("unassigned_R2.fq");
+    }
+    params.unassigned1_fd = my_fopen(unassigned1_fn, params.gz_out);
+    params.unassigned2_fd = my_fopen(unassigned2_fn, params.gz_out);
+    params.umis_2_short_fd = fopen(umis_2_short_fn, "a");
+
+    // ? where does this goes?
+    fprintf(params.umis_2_short_fd, "name\tumi\tlen\tmin_len\n");
+
+    //TODO plugin sanity_chk here
+
+    if(params.fq2_fd) {
+        params.paired = 1;
+    }
+
+    fprintf(stderr, "\n\
+                     \n  Running: %s\
+                     \n  Command line args:\
+                     \n      --fq1 %s\
+                     \n      --fq2 %s\
+                     \n      --barcodes %s\
+                     \n      --unassigned_R1 %s\
+                     \n      --unassigned_R2 %s\
+                     \n      --combine %d\
+                     \n      --umi %d\
+                     \n      --max-mismatch %d\
+                     \n      --min-umi-len %d\
+                     \n      --max-5prime-crop %d\
+                     \n      --no-comment %d\
+                     \n      --stats %s\
+                     \n      --threads %d\
+                     \n\
+                     \n  In Progess...\
+                     \n\
+                     \n", PROGRAM_NAME,\
+                     fq1_fn, fq2_fn,\
+                     bc_fn,\
+                     unassigned1_fn, unassigned2_fn,\
+                     params.combine, params.umi,\
+                     params.mismatch, params.min_umi_len,
+                     params.max_5prime_crop, params.no_comment,\
+                     log_fn, threads);
+
+    char *bcout_fn1 = NULL;
+    char *bcout_fn2 = NULL;
+    /* Creating linked list of barcode data */
+    // https://www.hackerearth.com/practice/data-structures/linked-list/singly-linked-list/tutorial/
+    // where each node is represents one barcode from the barcode file
+    bc_fd = fopen(bc_fn, "r");
+    if (!bc_fd) {
+        fprintf(stderr, "ERROR: Unable to barcode file\n");
+        exit(EXIT_FAILURE);
+    }
+
+    head = NULL;
+    curr = NULL;
+
+    char line_buff[1024];
+    int max_items = 6;
+    while(fgets(line_buff, 1024, bc_fd)) {
+        curr = (barcode_data_t*) calloc(1,sizeof(barcode_data_t));
+
+        char *p = strtok(line_buff, "\t");
+        char *s_name = strdup(p);
+
+        p = strtok(NULL, "\t");
+        curr->bc_grp = strdup(p);
+
+        bcout_fn1 = (char *) malloc(MAX_FILENAME_LENGTH*2);
+        bcout_fn1[0] = '\0';
+        get_bc_fn(&bcout_fn1, s_name, curr->bc_grp, 1, params.gz_out);
+        curr->bcfile1 = my_fopen(_mkdir(bcout_fn1), params.gz_out);
+
+        if(params.paired > 0 && params.combine < 0) {
+            bcout_fn2 = (char *) malloc(MAX_FILENAME_LENGTH*2);
+            bcout_fn2[0] = '\0';
+            get_bc_fn(&bcout_fn2, s_name, curr->bc_grp, 2, params.gz_out);
+            curr->bcfile2 = my_fopen(_mkdir(bcout_fn2), params.gz_out);
+        }
+
+        //TODO for hardcode max limit of items in the barcodes file to 6
+        curr->bc = calloc(max_items, sizeof(void*));
+
+        int i=0;
+        while(i <= max_items && (p = strtok(NULL, "\t\n"))) {
+            // remove the token, new line char
+            curr->bc[i] = strdup(p);
+            fprintf(stdout, "  BC %s ", curr->bc[i]);
+            i++;
+        }
+        fprintf(stdout, "\n");
+
+        curr->num_records = 0;
+        curr->next = head;
+        head = curr;
+    }
+
+    free(bcout_fn1);
+    free(bcout_fn2);
+    free(bc_fn);
+    free(fq1_fn);
+    free(fq2_fn);
+    free(unassigned1_fn);
+    free(unassigned2_fn);
+    //free(umis_2_short_fn);
+
+    // Threading
+    pthread_t tid[threads];
+    pthread_mutex_t in_lock;
+    pthread_mutex_t out_lock;
+    pthread_cond_t cv;
+    int line_num = 0;
+    int out_line_num = 0;
+
+    pthread_mutex_init(&in_lock, NULL);
+    pthread_mutex_init(&out_lock, NULL);
+    pthread_cond_init(&cv, NULL);
+
+    thread_data_t thread_data[threads];
+
+    for(int i=0; i < threads; i++) {
+
+        thread_data[i].params = &params;
+        thread_data[i].curr = curr;
+        thread_data[i].metrics = &metrics;
+        thread_data[i].id = i;
+        thread_data[i].in_lock = &in_lock;
+        thread_data[i].out_lock = &out_lock;
+        thread_data[i].line_num = &line_num;
+        thread_data[i].out_line_num = &out_line_num;
+        thread_data[i].cv = &cv;
+
+        pthread_create(&(tid[i]), NULL, &demult_runner, thread_data+i);
+    }
+
+    for(int i=0; i < threads; i++) {
+        pthread_join(tid[i], NULL);
+    }
+    printf("Threads all done\n");
+
+    pthread_mutex_destroy(&in_lock);
+    pthread_mutex_destroy(&out_lock);
+
+    //if (!log_fn) { is this better?
+    if (log_fn == NULL) {
+        log_file = stdout;
+    }
+    else {
+        log_file = fopen(log_fn, "w");
+    }
+
+    fprintf (log_file, "Barcode\tN_records\tN_pairs\tP_pairs\n");
+    curr = head;
+    int total_pairs = metrics.total/2;
+
+    while(curr) {
+
+        int n_pairs = curr->num_records/2;
+        float percent_pairs = (float) n_pairs/total_pairs;
+
+        fprintf(log_file,"%s\t%d\t%d\t%.2f\n", curr->bc_grp, curr->num_records, n_pairs, percent_pairs);
+
+        curr = curr->next;
+    }
+
+    int unknown_pairs = metrics.num_unknown/2;
+    float percent_unknown = (float) unknown_pairs/total_pairs;
+    float tot_chk = (float) total_pairs/total_pairs;
 
-	if (argc < 2 || (strcmp (argv[1],"pe") != 0 && strcmp (argv[1],"se") != 0 && strcmp (argv[1],"--version") != 0 && strcmp (argv[1],"--help") != 0)) {
-		main_usage (EXIT_FAILURE);
-	}
+    fprintf(log_file, "unassigned\t%d\t%d\t%.2f\n", metrics.num_unknown, unknown_pairs, percent_unknown);
+    fprintf(log_file, "total\t%d\t%d\t%.2f\n", metrics.total, total_pairs, tot_chk);
 
-	if (strcmp (argv[1],"--version") == 0) {
-		fprintf(stdout, "%s version %0.3f\nCopyright (c) 2011 The Regents of University of California, Davis Campus.\n%s is free software and comes with ABSOLUTELY NO WARRANTY.\nDistributed under the MIT License.\n\nWritten by %s\n", PROGRAM_NAME, VERSION, PROGRAM_NAME, AUTHORS);
+    end = time(NULL);
+    fprintf(stderr, "\n All done :) \
+                     \n It took %.2f minutes\n",
+                     difftime(end, start)/60);
 
-		exit (EXIT_SUCCESS);
 
-	}
+    fclose(bc_fd);
+    fclose(log_file);
+    params_destroy(&params);
 
-	else if (strcmp (argv[1],"--help") == 0) {
-		main_usage (EXIT_SUCCESS);
-	}
+    free(log_fn);
 
-	else if (strcmp (argv[1],"pe") == 0) {
-		retval = paired_main (argc, argv);
-		return (retval);
-	}
+    curr = head;
+    while (curr) {
+        fclose(curr->bcfile1);
+        if (curr->bcfile2)
+            fclose(curr->bcfile2);
 
-	else if (strcmp (argv[1],"se") == 0) {
-		retval = single_main (argc, argv);
-		return (retval);
-	}
+        free (curr->bc_grp);
+        free (curr->bc);
+        temp = curr;
+        curr = curr->next;
+        free (temp);
+    }
 
-	return 0;
+    // good read :)
+    little_story(EXIT_SUCCESS);
 }
diff --git a/src/sabre.h b/src/sabre.h
index 693af37..d6a8f33 100644
--- a/src/sabre.h
+++ b/src/sabre.h
@@ -1,67 +1,95 @@
-#ifndef SABRE_H
-#define SABRE_H
+#ifndef _SABRE_H
+#define _SABRE_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <zlib.h>
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <libgen.h>
+#include <time.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #ifndef PROGRAM_NAME
 #define PROGRAM_NAME "sabre"
 #endif
 
 #ifndef AUTHORS
-#define AUTHORS "Nikhil Joshi, UC Davis Bioinformatics Core\n"
-#endif
-
-#ifndef VERSION
-#define VERSION 0.0
+#define AUTHORS "\n\
+	         \n      Nikhil Joshi, UC Davis Bioinformatics Core\
+                 \n      Kirill Tsyganov, Monash Bioinformatics Platform\
+                 \n"
 #endif
 
-/* Options drawn from GNU's coreutils/src/system.h */
-/* These options are defined so as to avoid conflicting with option
-values used by commands */
-enum {
-  GETOPT_HELP_CHAR = (CHAR_MIN - 2),
-  GETOPT_VERSION_CHAR = (CHAR_MIN - 3)
-};
-#define GETOPT_HELP_OPTION_DECL \
-"help", no_argument, NULL, GETOPT_HELP_CHAR
-#define GETOPT_VERSION_OPTION_DECL \
-"version", no_argument, NULL, GETOPT_VERSION_CHAR
-#define case_GETOPT_HELP_CHAR(Usage_call) \
-case GETOPT_HELP_CHAR: \
-Usage_call(EXIT_SUCCESS); \
-break;
-#define case_GETOPT_VERSION_CHAR(Program_name, Version, Authors) \
-case GETOPT_VERSION_CHAR: \
-fprintf(stdout, "%s version %0.3f\nCopyright (c) 2011 The Regents " \
-"of University of California, Davis Campus.\n" \
-"%s is free software and comes with ABSOLUTELY NO WARRANTY.\n"\
-"Distributed under the MIT License.\n\nWritten by %s\n", \
-Program_name, Version, Program_name, Authors); \
-exit(EXIT_SUCCESS); \
-break;
-/* end code drawn from system.h */
+//https://semver.org/
 
 #define MAX_BARCODE_LENGTH 100
 #define MAX_FILENAME_LENGTH 200
 
-
-typedef struct listel {
-	char* bc;
-	int num_records;
-	FILE* bcfile;
-	struct listel *next;
-} barcode_data;
+//NOTE more info on struc and typedef
+//https://stackoverflow.com/questions/1675351/typedef-struct-vs-struct-definitions
+typedef struct actl_bc_cnt {
+    char *bc;
+    int cnt;
+    struct actl_bc_cnt *next;
+} actl_bc_cnt;
 
 typedef struct listel_p {
-	char* bc;
+	//TODO this is future implementation
+	//char **bc;
+	//typedef struct barcode_t *b
+	// this is a pointer to an array of pointers
+	char **bc;
+	char *bc_grp;
 	int num_records;
 	FILE* bcfile1;
 	FILE* bcfile2;
+        struct actl_bc_cnt *actl_bc_cnt;
 	struct listel_p *next;
-} barcode_data_paired;
+} barcode_data_t;
+
+//TODO this is for future implementation
+//typdef strcut {
+//  char *bc;
+//  barcode_data_t *bacrcode_data;
+//} barcode_t
+
+typedef struct {
+    gzFile fq1_fd;
+    gzFile fq2_fd;
+    FILE* unassigned1_fd;
+    FILE* unassigned2_fd;
+    FILE* umis_2_short_fd;
+    int mismatch;
+    int combine;
+    int umi;
+    int paired;
+    int min_umi_len;
+    int max_5prime_crop;
+    int no_comment;
+    int gz_out;
+} param_t;
+
+typedef struct {
+    int num_unknown;
+    int total;
+} metrics_t;
 
+typedef struct {
+    int id;
+    const param_t* params;
+    barcode_data_t* curr;
+    metrics_t* metrics;
+    pthread_mutex_t *in_lock, *out_lock;
+    pthread_cond_t *cv;
+    volatile int *line_num;  // Pointer to line number.  Access to this protected by in_lock
+    volatile int *out_line_num;  // Pointer to line number output.  Protected by out_lock
+} thread_data_t;
 
-/* Function Prototypes */
-int single_main (int argc, char *argv[]);
-int paired_main (int argc, char *argv[]);
-int strncmp_with_mismatch (const char *s1, const char *s2, register size_t n, register size_t mismatch);
 
-#endif /*SABRE_H*/
+#endif /*_SABRE_H*/
diff --git a/src/sanity_check.c b/src/sanity_check.c
new file mode 100644
index 0000000..c6ffb1c
--- /dev/null
+++ b/src/sanity_check.c
@@ -0,0 +1,50 @@
+
+void* sanity_chk(void *arg) {
+    if (!fq1 || !fq2 || !unknownfn1 || !unknownfn2 || !barfn) {
+        paired_usage (EXIT_FAILURE);
+    }
+
+    if (!strcmp (fq1, fq2) || !strcmp (fq1, unknownfn1) || !strcmp (fq1, unknownfn2) ||
+            !strcmp (fq1, barfn) || !strcmp (fq2, unknownfn1) || !strcmp (fq2, unknownfn2) ||
+            !strcmp (fq2, barfn) || !strcmp (unknownfn1, unknownfn2) || !strcmp (unknownfn1, barfn) ||
+            !strcmp (unknownfn2, barfn)) {
+
+        fprintf (stderr, "ERROR: Duplicate input and/or output file names.\n");
+        return EXIT_FAILURE;
+    }
+
+    pe1 = gzopen (fq1, "r");
+    if (!pe1) {
+        fprintf (stderr, "ERROR: Could not open input file 1 '%s'.\n", fq1);
+        return EXIT_FAILURE;
+    }
+
+    pe2 = gzopen (fq2, "r");
+    if (!pe2) {
+        fprintf (stderr, "ERROR: Could not open input file 2 '%s'.\n", fq2);
+        return EXIT_FAILURE;
+    }
+
+    unknownfile1 = gzopen(unknownfn1, "wb");
+    if (!unknownfile1) {
+        fprintf (stderr, "ERROR: Could not open unknown output file 1 '%s'.\n", unknownfn1);
+        return EXIT_FAILURE;
+    }
+
+    unknownfile2 = gzopen(unknownfn2, "wb");
+    if (!unknownfile2) {
+        fprintf (stderr, "Could not open unknown output file 2 '%s'.\n", unknownfn2);
+        return EXIT_FAILURE;
+    }
+
+    barfile = fopen (barfn, "r");
+    if (!barfile) {
+        fprintf (stderr, "Could not open barcode file '%s'.\n", barfn);
+        return EXIT_FAILURE;
+    }
+
+    if(threads < 0) {
+        fprintf(stderr, "WARNING: Negative number of threads detected %d, setting threads to 1\n", threads);
+        threads = 1;
+    }
+}
diff --git a/src/usage.c b/src/usage.c
new file mode 100644
index 0000000..550326b
--- /dev/null
+++ b/src/usage.c
@@ -0,0 +1,80 @@
+
+#include "usage.h"
+
+void usage(int status) {
+
+    fprintf(stderr, "\n  Usage: %s [OPTIONS] -f <fastq_R1> -r <fastq_R2> -b <barcode_file>\
+                     \n\
+                     \n\
+                     \n  Options:\
+                     \n\
+                     \n    Required:\
+                     \n\
+                     \n        -f, --fq1 FILE                Input FASTQ R1 read\
+                     \n        -r, --fq2 FILE                Input FASTQ R2 reads\
+                     \n        -b, --barcodes FILE           Barcodes files, one barcode per line, e.g BC\\tPREFIX\
+                     \n        -w, --unassigned CHAR         Unassigned prefix\
+                     \n\
+                     \n    Other:\
+                     \n\
+                     \n        -c, --combine                 Combine R1 and R2 [NULL]\
+                     \n        -u, --umi                     Indicates that umi present in the R1 read [NULL]\
+                     \n        -m, --max-mismatch INT        Maximum number of mismatches allowed in a barcode [0]\
+                     \n        -l, --min-umi-len INT         Minimum UMI length to keep [0]\
+                     \n        -a, --max-5prime-crop INT     Maximum number of possible bases cropped from 5prime [0]\
+                     \n        -n, --no-comment              Drop extra comments from FASTQ header [NULL]\
+                     \n        -g, --gz-out                  gzip the output files (requires pigz or gzip in PATH)\
+                     \n        -s, --stats FILE              Write stats to file instead of STDOUT [STDOUT]\
+                     \n\
+                     \n    Extras:\
+                     \n\
+                     \n        -t, --threads INT             specify number of threads to use [4]\
+                     \n        -v, --version                 get current version\
+                     \n        -h, --help                    get help menu, exit status is zero\
+                     \n        -o, --story                   little story about sabre tool\
+                     \n\
+                     \n",
+                     PROGRAM_NAME);
+
+    exit(status);
+}
+
+void little_story(int status) {
+
+    fprintf(stdout, "\n\
+                     \n  Little story:\
+                     \n\
+                     \n        Sabre is a heavy cavalry sword with a curved blade and a single cutting edge\
+                     \n        Not sure though if the meaning was intended by original author...\
+                     \n\
+                     \n        Later on I was pointed out to me that yes of course it was intended\
+                     \n        since we are cutting off adaptors..\
+                     \n\
+                     \n        to be continued...\
+                     \n\
+                     \n");
+
+    exit(status);
+}
+
+void version(int status) {
+
+    fprintf(stdout, "\n\
+                     \n   %s\
+                     \n\
+                     \n   version: %s\
+                     \n\
+                     \n   Copyright (c) 2011 The Regents of University of California, Davis Campus.\
+                     \n   %s is free software and comes with ABSOLUTELY NO WARRANTY.\
+                     \n   Distributed under the MIT License.\
+                     \n\
+                     \n   Written by: %s\
+                     \n\
+                     \n",
+                     PROGRAM_NAME,
+                     VERSION,
+                     PROGRAM_NAME,
+                     AUTHORS);
+
+    exit(status);
+}
diff --git a/src/usage.h b/src/usage.h
new file mode 100644
index 0000000..4ab8f3f
--- /dev/null
+++ b/src/usage.h
@@ -0,0 +1,10 @@
+#ifndef _USAGE_H
+#define _USAGE_H
+
+#include "sabre.h"
+
+void usage(int status);
+void little_story(int status);
+void version(int status);
+
+#endif /*_USAGE_H*/
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..d5f75c7
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,251 @@
+#include "utils.h"
+
+// https://stackoverflow.com/questions/2336242/recursive-mkdir-system-call-on-unix/11425692
+// https://stackoverflow.com/questions/7430248/creating-a-new-directory-in-c
+const char * _mkdir(const char *file_path) {
+
+    if(!file_path) {
+        fprintf (stderr,
+                "ERROR: This shouldn't happend, file path == %s\n",
+                file_path);
+        exit(EXIT_FAILURE);
+    }
+
+    // return straigth away if a file_path is not nested file path
+    if(strstr(file_path, "/") == NULL) {
+        return file_path;
+    }
+    //TODO check if directory already exists or not
+    //struct stat st = {0};
+    //
+    //if (stat(dir, &st) == -1) {
+    //    mkdir(tmp, S_IRWXU);
+    //}
+
+    //char tmp[PATH_MAX]; // can't get this to work..
+    char tmp[256];
+    char *p = NULL;
+    char *dirc = strdup(file_path);
+    size_t len;
+
+    const char *dir = dirname(dirc);
+    snprintf(tmp, sizeof(tmp),"%s", dir);
+    len = strlen(tmp);
+
+    if(tmp[len - 1] == '/') {
+        tmp[len - 1] = 0;
+    }
+
+    for(p = tmp + 1; *p; p++) {
+        if(*p == '/') {
+            *p = 0;
+            mkdir(tmp, S_IRWXU);
+            *p = '/';
+        }
+    }
+
+    mkdir(tmp, S_IRWXU);
+    free(dirc);
+
+    return file_path;
+}
+
+//NOTE retuns zero on success
+//strcmp can be used for sorting, returns pos, zero, neg
+//BUT this new implementation can't be used as such just FYI
+match_ret_t chk_bc_mtch(const char *orig_bc, const char *orig_read, size_t max_mismatch, int max_5prime_crop) {
+    int orig_read_len = strlen(orig_read);
+    int orig_bc_len = strlen(orig_bc);
+    int n_crop = 0;
+    match_ret_t ret = { -1,-1 };
+    if(orig_bc_len > orig_read_len) {
+        fprintf (stderr,
+                "WARNING: Length of the barcode %d is greater than length of the reads %d.",
+                orig_bc_len, orig_read_len);
+        return ret;
+    }
+
+    while(n_crop <= max_5prime_crop) {
+
+        if(n_crop > orig_read_len) {
+            return ret;
+        }
+
+        int mismatch = 0;
+        char u1, u2;
+        const char *bc = orig_bc;
+        const char *read = orig_read+n_crop;
+        int bc_len = orig_bc_len;
+
+        while (bc_len-- > 0) {
+            u1 = *bc++;
+            u2 = *read++;
+
+            if (u1 != u2) {
+                mismatch++;
+                if (mismatch > max_mismatch) {
+                    break;
+                }
+            }
+
+            if (u1 == '\0' || u2 == '\0') {
+                ret.cropped = n_crop;
+                ret.mismatches = mismatch;
+                return ret;
+            }
+        }
+
+        if(mismatch <= max_mismatch) {
+            ret.cropped = n_crop;
+            ret.mismatches = mismatch;
+            return ret;
+        }
+
+        n_crop++;
+    }
+    //this is in the case of error
+    return ret;
+}
+
+// https://stackoverflow.com/questions/21880730/c-what-is-the-best-and-fastest-way-to-concatenate-strings
+//TODO this is a fastq mystrcat function, that returns a pointer to the end of the string
+void get_fqread(char *fqread, fq_rec_t *fq_rec, const char *barcode, char *umi_idx, int no_comment, int n_crop) {
+
+    fqread[0] = '\0';
+
+    if(n_crop < 0) {
+        fprintf(stderr,
+                "ERROR: n_crop set to %d. This can't happend\n",
+                n_crop);
+        exit(EXIT_FAILURE);
+    }
+
+    //@READNAME:BACRCODE:UMI
+    //1st line
+    strcat(fqread, fq_rec->name);
+    //TODO later can have conditional here depending on the the structure and/or BARCODE/UMI
+    if(barcode) {
+        strcat(fqread, ":");
+        strcat(fqread, barcode);
+    }
+    else if(!barcode) {
+        barcode = "";
+    }
+
+    if(umi_idx) {
+        strcat(fqread, ":");
+        strcat(fqread, umi_idx);
+    }
+
+    if(fq_rec->comment && no_comment == -1) {
+        strcat(fqread, " ");
+        strcat(fqread, fq_rec->comment);
+    }
+    strcat(fqread, "\n");
+
+    //2nd line
+    strcat(fqread, (fq_rec->seq)+strlen(barcode)+n_crop);
+    strcat(fqread, "\n");
+
+    //3rd line
+    strcat(fqread, "+");
+    strcat(fqread, fq_rec->name);
+    if(fq_rec->comment && no_comment == -1) {
+        strcat(fqread, " ");
+        strcat(fqread, fq_rec->comment);
+    }
+    strcat(fqread, "\n");
+
+    //4th line
+    strcat(fqread, (fq_rec->qual)+strlen(barcode)+n_crop);
+    strcat(fqread, "\n");
+}
+
+void get_merged_fqread(char *fqread, fq_rec_t *fq_rec1, fq_rec_t *fq_rec2, const char *barcode, char *umi_idx, int no_comment, int n_crop) {
+    fqread[0] = '\0';
+    //@READNAME:BACRCODE:UMI
+    //1st line
+    strcat(fqread, fq_rec1->name);
+    //TODO later can have conditional here depending on the the structure and/or BARCODE/UMI
+    if(barcode) {
+        strcat(fqread, ":");
+        strcat(fqread, barcode);
+    }
+
+    if(umi_idx) {
+        strcat(fqread, ":");
+        strcat(fqread, umi_idx);
+    }
+
+    if(fq_rec1->comment && no_comment == -1) {
+        strcat(fqread, " ");
+        strcat(fqread, fq_rec1->comment);
+    }
+    strcat(fqread, "\n");
+
+    //2nd line
+    strcat(fqread, fq_rec2->seq);
+    strcat(fqread, "\n");
+
+    //3rd line
+    strcat(fqread, "+");
+    strcat(fqread, "");
+    //strcat(fqread, fq_rec2->name);
+    //if(fq_rec2->comment && no_comment == -1) {
+    //    strcat(fqread, " ");
+    //    strcat(fqread, fq_rec2->comment);
+    //}
+    strcat(fqread, "\n");
+
+    //4th line
+    strcat(fqread, (fq_rec2->qual));
+    strcat(fqread, "\n");
+}
+
+void get_bc_fn(char **bcout_fn, char *s_name, char *barcode, int read_type, int gz) {
+
+    if(strlen(s_name) > MAX_FILENAME_LENGTH) {
+        fprintf (stderr,
+                "ERROR: Too many characters in your sample name; %s:%zd \n",
+                s_name, strlen(s_name));
+        exit(EXIT_FAILURE);
+    }
+
+    strcat(*bcout_fn, s_name);
+    strcat(*bcout_fn, "_");
+    strcat(*bcout_fn, barcode);
+
+    if(read_type == 1) {
+        strcat(*bcout_fn, "_R1.fastq");
+    }
+    else if(read_type == 2) {
+        strcat(*bcout_fn, "_R2.fastq");
+    }
+    else {
+        fprintf (stderr,
+                "ERROR: This shouldn't happen, wrong read type was passed through -> %d\n",
+                read_type);
+        exit(EXIT_FAILURE);
+    }
+    if (gz) {
+        strcat(*bcout_fn, ".gz");
+    }
+}
+
+void set_default_params(param_t *params) {
+    params->mismatch = 0;
+    params->combine = -1;
+    params->umi = -1;
+    params->paired = -1;
+    params->min_umi_len = 0;
+    params->max_5prime_crop = 0;
+    params->no_comment = -1;
+}
+
+void params_destroy(param_t *params) {
+    gzclose(params->fq1_fd);
+    gzclose(params->fq2_fd);
+    fclose(params->unassigned1_fd);
+    fclose(params->unassigned2_fd);
+    fclose(params->umis_2_short_fd);
+}
diff --git a/src/utils.h b/src/utils.h
new file mode 100644
index 0000000..ca32703
--- /dev/null
+++ b/src/utils.h
@@ -0,0 +1,37 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "sabre.h"
+#include "fastq.h"
+
+typedef struct barcodes_t {
+    char *bc;
+    int cnts;
+} barcodes_t;
+
+typedef struct umis_t {
+    char *bc;
+    int len;
+    int cnts;
+} umis_t;
+
+typedef struct {
+    int mismatches;
+    int cropped;
+} match_ret_t;
+
+//This is needed if compilling with -std=c99, read below for more
+//https://stackoverflow.com/questions/26284110/strdup-confused-about-warnings-implicit-declaration-makes-pointer-with
+// char *strdup(const char*);
+// char *strndup(const char *s, size_t n);
+
+const char * _mkdir (const char *dir);
+match_ret_t chk_bc_mtch(const char *orig_bc, const char *orig_read, size_t max_mismatch, int max_5prime_crop);
+void get_fqread(char *fqread, fq_rec_t *fq_rec, const char *barcode, char *umi_idx, int no_comment, int n_crop);
+void get_merged_fqread(char *fqread, fq_rec_t *fq_rec1, fq_rec_t *fq_rec2, const char *barcode, char *umi_idx, int no_comment, int n_crop);
+void get_bc_fn(char **bcout_fn, char *s_name, char *barcode, int read_type, int gz);
+
+void set_default_params(param_t *params);
+void params_destroy(param_t *params);
+
+#endif /*UTILS_H*/